# Replication Package for
# "Bias and Overconfidence in Parametric Models of Interactive Processes"
# by William Berry, Jacqueline H. R. Demeritt, and Justin Esarey
#
# Continuous independent variable analysis
# 
# Package prepared by J. Esarey on 3/25/2015


# NEEDED PACKAGES
# in R: foreign, locfit, ROCR, mvtnorm, snow, rlecuyer, sfsmisc, VGAM
# in Stata: mat2txt and hl (latter from http://www.homepages.ucl.ac.uk/~ucakgam/stata/)

# uncomment next line if packages need to be installed
# install.packages(c("locfit", "ROCR", "mvtnorm", "snow", "rlecuyer", "sfsmisc", "VGAM"), dep=T)

# This replication file exists in five parts:

# (1) generate monte carlo data sets from 155 sample DGPs
# (2) estimate models on the monte carlo data sets & save results (takes a long time)
# (3) conduct summary analysis on model results
# (4) conduct size/power analysis for product term probit & logit models
# (5) make drawings of the DGPs.


rm(list=ls())
# set directory to the location of this replication file
setwd("J:/Dropbox/jesarey_documents/BRE Logit Misspecification Paper/Final Replication Package")


###############################################################
# Part One: generate Monte Carlo Data Sets
# if data has not already been drawn, uncomment line
# to create the Monte Carlo data sets
###############################################################
# source(file="draw_mc_data.r")






###############################################################
# Part Two: estimate models on MC data sets and save results
# note: this is very time consuming!
###############################################################

rm(list=ls())
set.seed(123456)

# Set number of samples to draw
reps<-25

# Load necessary libraries
require(foreign) 
require(locfit)
require(ROCR)
require(mvtnorm)
require(snow)
require(sfsmisc)
require(VGAM)

# start up cluster
NODES<-12 #number of cluster nodes to establish
cl <- makeCluster(rep("localhost", NODES), type = "SOCK")
clusterSetupRNG (cl, type = "RNGstream", seed=rep(398123, 6))

invisible(clusterEvalQ(cl, library(VGAM)))


# Load functions to report true Probability y=1

trupry<-function(r,X,Z){
  
  if(r==85) {
    trupry1 = (((X -1)^2 / -10) + .9) + ((((Z + .75)^2 / 2.5) -.225) *((((X -1.250000075)^2 / -3.750000375) + 0.516666675) - (((X -1)^2 / -10) + .9)))
    return(trupry1)}
  
  if (r==82) {
    trupry1 = (((X -1)^2 / 10) + .3) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X -1)^2 / 2.5) + .5) - (((X -1)^2 / 10) + .3)))
    return(trupry1)}
  
  if(r==86) {
    trupry1 = (((X + 0)^2 / 2) + 0.4) + ((((Z + .125)^2 / 1.25) - .0125) *((((X + 0)^2 / 5) + 0.1) - (((X + 0)^2 / 2) + 0.4)))
    return(trupry1)}
  
  if(r==87) {
    trupry1 = ((1-Z)*((((X + 0.2499994)^2 / 2.9999976) + 0.27916675))) + ((Z)*(((X + 0)^2 / 5 + 0.1)))
    return(trupry1)}
  
  if(r==88) {
    trupry1 = (((X - 1.25)^2 / -5) + 0.9125) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X - 1.25)^2 / -2.5) + 0.825) - (((X - 1.25)^2 / -5) + 0.9125)))
    return(trupry1)}
  
  if(r==80) {
    trupry1 = ((1-Z)*((((X -1.249785775)^2 / 2.142245073) + 0.170874979))) + ((Z)*(((X -1.246268657)^2 / 3.731343284 + 0.083746269)))
    return(trupry1)}
  
  if(r==100) {
    trupry1 = (((X -1)^2 / -2) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125) *((((X -1.248502994)^2 / -14.97005988) + 0.90412515) - (((X -1)^2 / -2) + 0.8)))
    return(trupry1)}
  
  if(r==101) {
    trupry1 = (((X + 0)^2 / 10) + 0.2) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X + 0.25030012)^2 / 3.00120048) + 0.37912497) - (((X + 0)^2 / 10) + 0.2)))
    return(trupry1)}
  
  if(r==96) {
    trupry1 = (((X + 2.77556E-16)^2 / -5) + 0.9) + ((((Z + .75)^2 / 2.5) - .225) *((((X + 0)^2 / -1.666666667) + 0.8) - (((X + 2.77556E-16)^2 / -5) + 0.9)))
    return(trupry1)}
  
  if(r==97) {
    trupry1 = (((X -1)^2 / 1.428571429) + 0.2) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X - 1.25)^2 / 5) + 0.0875) - (((X -1)^2 / 1.428571429) + 0.2)))
    return(trupry1)}
  
  if(r==111) {
    trupry1 = (((X + 0.25)^2 / -2.5) + 0.725) + ((((Z + .75)^2 / 2.5) - .225) *((((X + 5.55112E-16)^2 / -10) + 0.9) - (((X + 0.25)^2 / -2.5) + 0.725)))
    return(trupry1)}
  
  if(r==112) {
    trupry1 = (((X + 0)^2 / -1.428571429) + 0.8) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X + 0.257575758)^2 / -7.575757576) + 0.908757576) - (((X + 0)^2 / -1.428571429) + 0.8)))
    return(trupry1)}
  
  if(r==89) {
    trupry1 = (((X + 0)^2 / 10) + 0.7) + ((((Z + .125)^2 / 1.25) - .0125)*((((X + 0.249625187)^2 / 3.748125937) + 0.083374963) - (((X + 0)^2 / 10) + 0.7)))
    return(trupry1)}
  
  if(r==83) {
    trupry1 = (((X + 5.55112E-16)^2 / -10) + 0.5) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0)^2 / -2.5) + 0.9) - (((X + 5.55112E-16)^2 / -10) + 0.5)))
    return(trupry1)}
  
  if(r==81) {
    trupry1 = (((X -1.249999985)^2 / 7.49999985) + 0.691666668) + ((((Z + .125)^2 / 1.25) - .0125) *((((X - 1)^2 / 2) + 0.3) - (((X -1.249999985)^2 / 7.49999985) + 0.691666668)))
    return(trupry1)}
  
  if(r==90) {
    trupry1 = (((X -1.253012048)^2 / -3.012048193) + 0.921253012) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X -1.257575758)^2 / -7.575757576) + 0.508757576) - (((X -1.253012048)^2 / -3.012048193) + 0.921253012)))
    return(trupry1)}
  
  if(r==91) {
    trupry1 = (((X - 1.25)^2 / -2.5) + 0.925) + ((((Z + .75)^2 / 2.5) - .225) *((((X - 1.25)^2 / -5) + 0.5125) - (((X - 1.25)^2 / -2.5) + 0.925)))
    return(trupry1)}
  
  if(r==84) {
    trupry1 = (((X -1.247863248)^2 / 2.136752137) + 0.071247863) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.246268657)^2 / 3.731343284) + 0.483746269) - (((X -1.247863248)^2 / 2.136752137) + 0.071247863)))
    return(trupry1)}
  
  if(r==98) {
    trupry1 = (((X - 1)^2 / 10) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125)*((((X -1.25030012)^2 / 3.00120048) + 0.07912497) - (((X - 1)^2 / 10) + 0.8)))
    return(trupry1)}
  
  if(r==99) {
    trupry1 = ((1-Z)*((((X - 1)^2 / -2) + 0.8))) + ((Z)*(((X - 1)^2 / -10 + 0.3)))
    return(trupry1)}
  
  if(r==102) {
    trupry1 = ((1-Z)*((((X + 0)^2 / 1.666666667) + 0.1))) + ((Z)*(((X + 0.250750751)^2 / 7.507507508 + 0.691624925)))
    return(trupry1)}
  
  if(r==103) {
    trupry1 = (((X + 0.249785775)^2 / 2.142245073) + 0.070874979) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X + 0.25)^2 / 5) + 0.5875) - (((X + 0.249785775)^2 / 2.142245073) + 0.070874979)))
    return(trupry1)}
  
  if(r==113) {
    trupry1 = ((1-Z)*((((X -5.55112E-16)^2 / -10) + 0.3))) + ((Z)*(((X + 0)^2 / -1.666666667 + 0.9)))
    return(trupry1)}
  
  if(r==108) {
    trupry1 = (((X + 0)^2 / -1.428571429) + 0.9) + ((((Z + .75)^2 / 2.5) - .225)*((((X + 0.257575758)^2 / -7.575757576) + 0.308757576) - (((X + 0)^2 / -1.428571429) + 0.9)))
    return(trupry1)}
  
  if(r==77) {
    trupry1 = 0.2+(0.1*X)+(0.2*Z)+(0.3*X*Z)
    return(trupry1)}
  
  if(r==74) {
    trupry1 = 0.2+(-0.1*X)+(0.4*Z)+(-0.3*X*Z)
    return(trupry1)}
  
  if(r==75) {
    trupry1 = 0.2+(0.2*X)+(0.1*Z)+(0.3*X*Z)
    return(trupry1)}
  
  if(r==76) {
    trupry1 = 0.6+(0.2*X)+(-0.5*Z)+(0.3*X*Z)
    return(trupry1)}
  
  if(r==78) {
    trupry1 = 0.1+(0.6*X)+(0.5*Z)+(-0.3*X*Z)
    return(trupry1)}
  
  if(r==79) {
    trupry1 = 0.1+(0.7*X)+(0.4*Z)+(-0.3*X*Z)
    return(trupry1)}
  
  if(r==93) {
    trupry1 = 0.4+(-0.1*X)+(0.5*Z)+(-0.4*X*Z)
    return(trupry1)}
  
  if(r==94) {
    trupry1 = 0.7+(-0.5*X)+(0.2*Z)+(0.4*X*Z)
    return(trupry1)}
  
  if(r==95) {
    trupry1 = 0.3+(0.6*X)+(-0.1*Z)+(-0.4*X*Z)
    return(trupry1)}
  
  if(r==92) {
    trupry1 = 0.9+(-0.3*X)+(-0.1*Z)+(-0.4*X*Z)
    return(trupry1)}
  
  if(r==105) {
    trupry1 = 0.8+(-0.6*X)+(0.1*Z)+(0.5*X*Z)
    return(trupry1)}
  
  if(r==104) {
    trupry1 = 0.9+(-0.7*X)+(-0.6*Z)+(0.5*X*Z)
    return(trupry1)}
  
  if(r==48) {
    trupry1 = (((X + 0)^2 / 3.333333333) + 0.3) + ((((Z + .75)^2 / 2.5) - .225)*((((X + 3.88578E-16)^2 / 3.333333333) + 0.6) - (((X + 0)^2 / 3.333333333) + 0.3)))
    return(trupry1)}
  
  if(r==40) {
    trupry1 = (((X - 1)^2 / 3.333333333) + 0.5) + ((((Z + .125)^2 / 1.25) - .0125)*((((X - 1)^2 / 3.333333333) + 0.2) - (((X - 1)^2 / 3.333333333) + 0.5)))
    return(trupry1)}
  
  if(r==41) {
    trupry1 = (((X - 1.25)^2 / 5) + 0.3875) + ((((Z + .125)^2 / 1.25) - .0125)*((((X - 1.25)^2 / 5) + 0.0875) - (((X - 1.25)^2 / 5) + 0.3875)))
    return(trupry1)}
  
  if(r==43) {
    trupry1 = (Z*((((X + 0)^2 / -3.333333333) + 0.4))) + ((1-Z)*(((X + 3.88578E-16)^2 / -3.333333333 + 0.8)))
    return(trupry1)}
  
  if(r==42) {
    trupry1 = (((X + 0.25)^2 / -5) + 0.9125) + ((((Z - 1.125)^2 / -1.25) + 1.0125)*((((X + 0.25)^2 / -5) + 0.5125) - (((X + 0.25)^2 / -5) + 0.9125)))
    return(trupry1)}
  
  if(r==46) {
    trupry1 = (Z*((((X + 3.88578E-16)^2 / 3.333333333) + 0.6))) + ((1-Z)*(((X + 0)^2 / 3.333333333 + 0.1)))
    return(trupry1)}
  
  if(r==49) {
    trupry1 = (((X - 1)^2 / -2.5) + 0.5) + ((((Z - 1.125)^2 / -1.25) + 1.0125)*((((X - 1)^2 / -2.5) + 0.8) - (((X - 1)^2 / -2.5) + 0.5)))
    return(trupry1)}
  
  if(r==47) {
    trupry1 = (((X -1.249625187)^2 / -3.748125937) + 0.916625037) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.249625187)^2 / -3.748125937) + 0.616625037) - (((X -1.249625187)^2 / -3.748125937) + 0.916625037)))
    return(trupry1)}
  
  if(r==44) {
    trupry1 = (((X -1.249625187)^2 / 3.748125937) + 0.083374963) + ((((Z + .75)^2 / 2.5) - .225)*((((X -1.249625187)^2 / 3.748125937) + 0.483374963) - (((X -1.249625187)^2 / 3.748125937) + 0.083374963)))
    return(trupry1)}
  
  if(r==45) {
    trupry1 = (((X + 0.25030012)^2 / -3.00120048) + 0.62087503) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0.25030012)^2 / -3.00120048) + 0.92087503) - (((X + 0.25030012)^2 / -3.00120048) + 0.62087503)))
    return(trupry1)}
  
  if(r==17) {
    trupry1 = 0.4+(-0.3*X)+(0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==18) {
    trupry1 = 0.5+(-0.3*X)+(0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==19) {
    trupry1 = 0.6+(0.3*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==22) {
    trupry1 = 0.2+(0.3*X)+(0.4*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==20) {
    trupry1 = 0.6+(0.3*X)+(-0.4*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==23) {
    trupry1 = 0.1+(0.3*X)+(0.5*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==21) {
    trupry1 = 0.5+(0.4*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==15) {
    trupry1 = 0.9+(-0.4*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==24) {
    trupry1 = 0.1+(0.4*X)+(0.4*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==16) {
    trupry1 = 0.9+(-0.5*X)+(-0.3*Z)+(0*X*Z)
    return(trupry1)}
  
  if(r==59) {
    trupry1 = (((X+0)^2 / 3.333333) + 0.3) + ((((Z + .75)^2 / 2.5) - .225) * ((((X + .249962502)^2 / 3.749812509) + 0.4833375) - (((X + 0)^2 / 3.333333) + 0.3)))
    return(trupry1)}
  
  if(r==54) {
    trupry1 = (((X + -1)^2 / 3.33333333) + 0.5) + ((((Z + .125)^2 / 1.25) - .0125) * ((((X + -1)^2 / 5) + 0.2) - (((X + -1)^2 / 3.33333333) + 0.5)))
    return(trupry1)}
  
  if(r==55) {
    trupry1 = (((X + -1.25)^2 / 5) + 0.3875) + ((((Z + 0.125)^2 / 1.25) - 0.0125) * ((((X + -1)^2 / 5) + 0.2) - (((X + -1.25)^2 / 5) + 0.3875)))
    return(trupry1)}
  
  if(r==56) {
    trupry1 = (Z*(((X + 0)^2 / -3.33333333) + 0.4)) + ((1-Z)*(((X + 4.44089e-16)^2 / -2.5) + 0.8))
    return(trupry1)}
  
  if(r==60) {
    trupry1 = (Z*((((X + .25)^2 / 5) + 0.5875))) + ((1-Z)*(((X + 0)^2 / 2.5 + 0.1)))
    return(trupry1)}
  
  if(r==61) {
    trupry1 = (((X - 1)^2 / -2.5) + 0.5) + ((((Z - 1.125)^2 / -1.25) + 1.0125)*((((X - 1)^2 / -3.33333333) + 0.8) - (((X - 1)^2 / -2.5) + 0.5)))
    return(trupry1)}
  
  if(r==58) {
    trupry1 = (((X -1)^2 / -2) + 0.9) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.249625187)^2 / -3.748125937) + 0.616625037) - (((X -1)^2 / -2) + 0.9)))
    return(trupry1)}
  
  if(r==57) {
    trupry1 = (((X + 0.25)^2 / -2.5) + 0.725) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0.25030012)^2 / -3.00120048) + 0.92087503) - (((X + 0.25)^2 / -2.5) + 0.725)))
    return(trupry1)}
  
  if(r==51) {
    trupry1 = 0.5+(-0.3*X)+(0.3*Z)+(.1*X*Z)
    return(trupry1)}
  
  if(r==52){
    trupry1 = 0.6+(0.3*X)+(-0.4*Z)+(0.1*X*Z)
    return(trupry1)}
  
  if(r==53){
    trupry1 = 0.1+(0.3*X)+(0.5*Z)+(-0.1*X*Z)
    return(trupry1)}
  
  if(r==50){
    trupry1 = 0.9+(-0.4*X)+(-0.3*Z)+(0.1*X*Z)
    return(trupry1)}
  
  if(r==66){
    trupry1 = (((X - 1)^2 / 5) + 0.3) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X -1)^2 / 2.5) + .5) - (((X -1)^2 / 5) + .3)))
    return(trupry1)}
  
  if(r==69){
    trupry1 = (((X + 0)^2 / 2.5) + 0.5) + ((((Z + .125)^2 / 1.25) - .0125) *((((X + 0)^2 / 5) + 0.1) - (((X + 0)^2 / 2.5) + 0.5)))
    return(trupry1)}
  
  if(r==70){
    trupry1 = ((1-Z)*((((X + 0.2499994)^2 / 2.9999976) + 0.27916675))) + ((Z)*(((X + 0)^2 / 3.33333333 + 0.1)))
    return(trupry1)}
  
  if(r==71){
    trupry1 = (((X - 1.25)^2 / -5) + 0.9125) + ((((Z - 1.125)^2 / -1.25) + 1.0125) *((((X - 1.25030012)^2 / -3.00120048) + 0.82087503) - (((X - 1.25)^2 / -5) + 0.9125)))
    return(trupry1)}
  
  if(r==72){
    trupry1 = (((X + 0)^2 / 10) + 0.7) + ((((Z + .125)^2 / 1.25) - .0125)*((((X + 0.25)^2 / 5) + 0.1875) - (((X + 0)^2 / 10) + 0.7)))
    return(trupry1)}
  
  if(r==67){
    trupry1 = (((X + 0)^2 / -5) + 0.5) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X + 0)^2 / -2.5) + 0.9) - (((X + 0)^2 / -5) + 0.5)))
    return(trupry1)}
  
  if(r==73){
    trupry1 = (((X -1.253012048)^2 / -3.012048193) + 0.921253012) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X -1.25)^2 / -5) + 0.6125) - (((X -1.253012048)^2 / -3.012048193) + 0.921253012)))
    return(trupry1)}
  
  if(r==68){
    trupry1 = (((X -1.247863248)^2 / 2.136752137) + 0.071247863) + ((((Z - 1.75)^2 / -2.5) + 1.225)*((((X -1.25030012)^2 / 3.00120048) + 0.37912497) - (((X -1.247863248)^2 / 2.136752137) + 0.071247863)))
    return(trupry1)}
  
  if(r==64){
    trupry1 = 0.2+(0.1*X)+(0.3*Z)+(0.2*X*Z)
    return(trupry1)}
  
  if(r==62){
    trupry1 = 0.2+(-0.1*X)+(0.3*Z)+(-0.2*X*Z)
    return(trupry1)}
  
  if(r==65){
    trupry1 = 0.2+(0.2*X)+(0.1*Z)+(0.2*X*Z)
    return(trupry1)}
  
  if(r==63){
    trupry1 = 0.5+(0.2*X)+(-0.4*Z)+(0.2*X*Z)
    return(trupry1)}
  
  if(r==115){
    trupry1 = (((X -1)^2 / -1.66666667) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125) *((((X -1.248502994)^2 / -14.97005988) + 0.90412515) - (((X -1)^2 / -1.66666667) + 0.8)))
    return(trupry1)}
  
  if(r==109){
    trupry1 = (((X -1)^2 / 1.428571429) + 0.2) + ((((Z - 1.75)^2 / -2.5) + 1.225) *((((X - 1.250750751)^2 / 7.507507508) + 0.091624925) - (((X -1)^2 / 1.428571429) + 0.2)))
    return(trupry1)}
  
  if(r==110){
    trupry1 = (((X - 1)^2 / 10) + 0.8) + ((((Z + .125)^2 / 1.25) - .0125)*((((X -1.25)^2 / 2.5) + 0.075) - (((X - 1)^2 / 10) + 0.8)))
    return(trupry1)}
  
  if(r==114){
    trupry1 = ((1-Z)*((((X - 1)^2 / -1.66666667) + 0.9))) + ((Z)*(((X - 1)^2 / -10 + 0.3)))
    return(trupry1)}
  
  if(r==106){
    trupry1 = 0.7+(-0.6*X)+(0.2*Z)+(0.5*X*Z)
    return(trupry1)}
  
  if(r==107){
    trupry1 = 0.3+(0.6*X)+(-0.1*Z)+(-0.5*X*Z)
    return(trupry1)}
  
  
  if(r==1){
    trupry1=0.1+0*X+0*Z
    return(trupry1)}
  
  if(r==116){
    trupry1=(.1+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==117){
    trupry1=.1+(Z*.2)+0*X
    return(trupry1)}
  
  if(r==118){
    trupry1=.1+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==13){
    trupry1=.1+(Z*.4)+0*X
    return(trupry1)}
  
  if(r==119){
    trupry1=(.1+.5)-(Z*.5)+0*X
    return(trupry1)}
  
  if(r==2){
    trupry1=0.2+0*X+0*Z
    return(trupry1)}
  
  if(r==120){
    trupry1=(.2+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==121){
    trupry1=(.2+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==14){
    trupry1=.2+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==122){
    trupry1=.2+(Z*.4)+0*X
    return(trupry1)}
  
  if(r==123){
    trupry1=(.2+.5)-(Z*.5)+0*X
    return(trupry1)}
  
  if(r==3){
    trupry1=0.3+0*X+0*Z
    return(trupry1)}
  
  if(r==124){
    trupry1=.3+(Z*.1)+0*X
    return(trupry1)}
  
  if(r==125){
    trupry1=.3+(Z*.2)+0*X
    return(trupry1)}
  
  if(r==126){
    trupry1=.3+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==10){
    trupry1=(.3+.4)-(Z*.4)+0*X
    return(trupry1)}
  
  if(r==127){
    trupry1=(.3+.5)-(Z*.5)+0*X
    return(trupry1)}
  
  if(r==4){
    trupry1=0.4+0*X+0*Z
    return(trupry1)}
  
  if(r==128){
    trupry1=(.4+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==129){
    trupry1=.4+(Z*.2)+0*X
    return(trupry1)}
  
  if(r==130){
    trupry1=.4+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==11){
    trupry1=(.4+.4)-(Z*.4)+0*X
    return(trupry1)}
  
  if(r==131){
    trupry1=.4+(Z*.5)+0*X
    return(trupry1)}
  
  if(r==5){
    trupry1=0.5+0*X+0*Z
    return(trupry1)}
  
  if(r==12){
    trupry1=(.5+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==132){
    trupry1=(.5+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==133){
    trupry1=(.5+.3)-(Z*.3)+0*X
    return(trupry1)}
  
  if(r==134){
    trupry1=.5+(Z*.4)+0*X
    return(trupry1)}
  
  if(r==6){
    trupry1=0.6+0*X+0*Z
    return(trupry1)}
  
  if(r==135){
    trupry1=.6+(Z*.1)+0*X
    return(trupry1)}
  
  if(r==136){
    trupry1=(.6+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==137){
    trupry1=.6+(Z*.3)+0*X
    return(trupry1)}
  
  if(r==7){
    trupry1=0.7+0*X+0*Z
    return(trupry1)}
  
  if(r==138){
    trupry1=.7+(Z*.1)+0*X
    return(trupry1)}
  
  if(r==139){
    trupry1=(.7+.2)-(Z*.2)+0*X
    return(trupry1)}
  
  if(r==8){
    trupry1=0.8+0*X+0*Z
    return(trupry1)}
  
  if(r==140){
    trupry1=(.8+.1)-(Z*.1)+0*X
    return(trupry1)}
  
  if(r==9){
    trupry1=0.9+0*X+0*Z
    return(trupry1)}
  
  if(r==25){
    trupry1=(.1+.1)-(((((Z-1.125)^2)/-1.25)+1.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==141){
    trupry1=.1+(((((Z+.125)^2)/1.25)-.0125)*.2)+0*X
    return(trupry1)}
  
  if(r==33){
    trupry1=.1+(((((Z+.125)^2)/1.25)-.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==142){
    trupry1=.1+(((((Z+.75)^2)/2.5)-.225)*.4)+0*X
    return(trupry1)}
  
  if(r==143){
    trupry1=(.1+.5)-(((((Z-1.75)^2)/-2.5)+1.225)*.5)+0*X
    return(trupry1)}
  
  if(r==26){
    trupry1=(.2+.1)-(((((Z-1.75)^2)/-2.5)+1.225)*.1)+0*X
    return(trupry1)}
  
  if(r==27){
    trupry1=(.2+.2)-(((((Z-1.75)^2)/-2.5)+1.225)*.2)+0*X
    return(trupry1)}
  
  if(r==144){
    trupry1=.2+(((((Z+.75)^2)/2.5)-.225)*.3)+0*X
    return(trupry1)}
  
  if(r==145){
    trupry1=.2+(((((Z+.125)^2)/1.25)-.0125)*.4)+0*X
    return(trupry1)}
  
  if(r==28){
    trupry1=(.2+.5)-(((((Z-1.75)^2)/-2.5)+1.225)*.5)+0*X
    return(trupry1)}
  
  if(r==146){
    trupry1=.3+(((((Z+.125)^2)/1.25)-.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==147){
    trupry1=.3+(((((Z+.125)^2)/1.25)-.0125)*.2)+0*X
    return(trupry1)}
  
  if(r==34){
    trupry1=.3+(((((Z+.125)^2)/1.25)-.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==148){
    trupry1=(.3+.4)-(((((Z-1.125)^2)/-1.25)+1.0125)*.4)+0*X
    return(trupry1)}
  
  if(r==29){
    trupry1=(.3+.5)-(((((Z+.75)^2)/2.5)-.225)*.5)+0*X
    return(trupry1)}
  
  if(r==30){
    trupry1=(.4+.1)-(((((Z-1.125)^2)/-1.25)+1.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==35){
    trupry1=.4+(((((Z+.75)^2)/2.5)-.225)*.2)+0*X
    return(trupry1)}
  
  if(r==149){
    trupry1=.4+(((((Z-1.125)^2)/-1.25)+1.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==150){
    trupry1=(.4+.4)-(((((Z+.75)^2)/2.5)-.225)*.4)+0*X
    return(trupry1)}
  
  if(r==36){
    trupry1=.4+(((((Z-1.75)^2)/-2.5)+1.225)*.5)+0*X
    return(trupry1)}
  
  if(r==151){
    trupry1=(.5+.1)-(((((Z+.75)^2)/2.5)-.225)*.1)+0*X
    return(trupry1)}
  
  if(r==31){
    trupry1=(.5+.2)-(((((Z+.75)^2)/2.5)-.225)*.2)+0*X
    return(trupry1)}
  
  if(r==152){
    trupry1=(.5+.3)-(((((Z+.75)^2)/2.5)-.225)*.3)+0*X
    return(trupry1)}
  
  if(r==37){
    trupry1=.5+(((((Z-1.125)^2)/-1.25)+1.0125)*.4)+0*X
    return(trupry1)}
  
  if(r==153){
    trupry1=.6+(((((Z-1.125)^2)/-1.25)+1.0125)*.1)+0*X
    return(trupry1)}
  
  if(r==154){
    trupry1=(.6+.2)-(((((Z+.125)^2)/1.25)-.0125)*.2)+0*X
    return(trupry1)}
  
  if(r==38){
    trupry1=.6+(((((Z-1.125)^2)/-1.25)+1.0125)*.3)+0*X
    return(trupry1)}
  
  if(r==39){
    trupry1=.7+(((((Z-1.75)^2)/-2.5)+1.225)*.1)+0*X
    return(trupry1)}
  
  if(r==32){
    trupry1=(.7+.2)-(((((Z+.75)^2)/2.5)-.225)*.2)+0*X
    return(trupry1)}
  
  if(r==155){
    trupry1=(.8+.1)-(((((Z+.125)^2)/1.25)-.0125)*.1)+0*X
    return(trupry1)}
  
}


######################
# First Difference Calculation Functions
######################

fd.true<-function(dgp){
  
  x1.1<-c(rep(1, 21), rep(.75, 21), seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05))
  x2.1<-c(seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05), rep(1, 21), rep(.75, 21))
  
  x1.2<-c(rep(0, 21), rep(.25, 21), seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05))
  x2.2<-c(seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05), rep(0, 21), rep(.25, 21))
  
  return(trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))
  
}


fd<-function(beta, form, link.name){
  
  x1.1<-c(rep(1, 21), rep(.75, 21), seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05))
  x2.1<-c(seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05), rep(1, 21), rep(.75, 21))
  
  x1.2<-c(rep(0, 21), rep(.25, 21), seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05))
  x2.2<-c(seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05), rep(0, 21), rep(.25, 21))
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
  
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
  
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
  
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  return(as.vector(pry1-pry2))
  
}



central.fd<-function(beta, form, link.name){
  
  # This is the same as the first difference function above,
  # but focusing only on the calculation of first differences in 
  # probability at central locations in the data
  
  x1.1<-c(1,0.75,0.5,0.5)
  x2.1<-c(0.5,0.5,1,0.75)
  
  x1.2<-c(0,0.25,0.5,0.5)
  x2.2<-c(0.5,0.5,0,0.25)
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
    
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
    
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
    
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  return(as.vector(pry1-pry2))
  
}

central.fd.true<-function(dgp){
  
  # This is the same as the fd.true function above,
  # but focusing only on the calculation of first differences in 
  # probability at central locations in the data
  
  x1.1<-c(1,0.75,0.5,0.5)
  x2.1<-c(0.5,0.5,1,0.75)
  
  x1.2<-c(0,0.25,0.5,0.5)
  x2.2<-c(0.5,0.5,0,0.25)
  
  return(trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))
  
}



#####################
# Marginal Effect Calculation Function
#####################

me<-function(X, beta, dir, form, link.name){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
      
      if(link.name=="regression"){
        link.fun<-gaussian(link = "identity")$linkinv
      }
      
      if(link.name=="loglog"){
        link.fun<-function(theta){exp(-exp(-theta))}
      }
      
      if(link.name=="logc"){
        link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
      }
      
    }else{
      link.fun<-binomial(link = link.name)$linkinv
    }
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
  if(dir=="x2"){
    x1.1<-x1
    x1.2<-x1
    x2.1<-x2+0.01
    x2.2<-x2-0.01
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
      
      if(link.name=="regression"){
        link.fun<-gaussian(link = "identity")$linkinv
      }
      
      if(link.name=="loglog"){
        link.fun<-function(theta){exp(-exp(-theta))}
      }
      
      if(link.name=="logc"){
        link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
      }
      
    }else{
      link.fun<-binomial(link = link.name)$linkinv
    }
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
}

clusterExport(cl, list="me")

me.shell.fun<-function(beta, form, link.name){
  return(c(me(cbind(0.5,0.5), beta, "x1", form, link.name), me(cbind(0.5,0.5), beta, "x2", form, link.name)))
}

me.dif<-function(beta, form, link.name){
  
  x1.1<-c(.5,.5)
  x2.1<-c(1,.75)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5,.5)
  x2.2<-c(0,.25)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me(X.1, beta=beta, dir="x1", form, link.name)
  me.2.1<-me(X.2, beta=beta, dir="x1", form, link.name)
  
  x1.1<-c(1,.75)
  x2.1<-c(.5,.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0,.25)
  x2.2<-c(.5,.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me(X.1, beta=beta, dir="x2", form, link.name)
  me.2.2<-me(X.2, beta=beta, dir="x2", form, link.name)
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}


me.true<-function(X, r, dir){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    return(as.vector((trupry(r,x1.1,x2.1)-trupry(r,x1.2,x2.2))/0.02))
    
  }
  
  if(dir=="x2"){
    
    x1.1<-x1
    x1.2<-x1
    x2.1<-x2+0.01
    x2.2<-x2-0.01
    
    return(as.vector((trupry(r,x1.1,x2.1)-trupry(r,x1.2,x2.2))/0.02))
    
  }
  
}


me.dif.true<-function(r){
  
  x1.1<-c(.5,.5)
  x2.1<-c(1,.75)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5,.5)
  x2.2<-c(0,.25)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me.true(X.1, r, dir="x1")
  me.2.1<-me.true(X.2, r, dir="x1")
  
  x1.1<-c(1,.75)
  x2.1<-c(.5,.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0,.25)
  x2.2<-c(.5,.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me.true(X.1, r, dir="x2")
  me.2.2<-me.true(X.2, r, dir="x2")
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}

second.dif<-function(beta, form, link.name){
  
  x1.1<-c(1,0,1,0)
  x2.1<-c(1,1,0,0)
  
  x1.2<-c(.75,.25,.75,.25)
  x2.2<-c(.75,.75,.25,.25)
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
    
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
    
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
    
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  
  return(c((pry1[1]-pry1[2])-(pry1[3]-pry1[4]), (pry2[1]-pry2[2])-(pry2[3]-pry2[4])))
  
}



second.dif.true<-function(r){
  
  x1.1<-c(1,0,1,0)
  x2.1<-c(1,1,0,0)
  
  x1.2<-c(.75,.25,.75,.25)
  x2.2<-c(.75,.75,.25,.25)
  
  pry1<-trupry(r,x1.1,x2.1)
  pry2<-trupry(r,x1.2,x2.2)
  
  return(c((pry1[1]-pry1[2])-(pry1[3]-pry1[4]), (pry2[1]-pry2[2])-(pry2[3]-pry2[4])))
  
}



#####################
# Estimate models on data sets
#####################

# Prepare filenames
filenames<-paste("sim0", 1:9, ".dta", sep = "")
filenames<-c(filenames,paste("sim", 10:155, ".dta", sep = ""))

for(r in 1:155){ 
  
  read.dta(filenames[r])->simdatmain
  
  fileConn<-file("A-current-iteration.txt")
  writeLines(c("Current Data Set:", r), fileConn)
  close(fileConn)
  
  # Progress indicator
  print(c("DATA SET: ",r),quote=F)
  pb<-txtProgressBar(min=0, max=reps, initial=0, style=3)
  
  for(i in 1:reps){
    
    setTxtProgressBar(pb, value=i)
    
    model.name.vector<-c()
    model.formula.vector<-c()
    model.link.vector<-c()
    
    # Subset datasets
    simdat<-simdatmain[(1+1000*(i-1)):(1000*i),]
    write.dta(simdat, file="simdat_temp.dta")
    
    # run a bunch of models!
    
    ######
    # logit model, with NO product term
    ######
    model.name<-"logit"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.1<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "logit")
    
    ######
    # logit model, with product term
    ######
    model.name<-"logit_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.2<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    ######
    # cloglog, with NO product term
    ######
    model.name<-"cloglog"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    
    # save the list of commands to an object
    model.3<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    ######
    # cloglog, with product term
    ######
    model.name<-"cloglog_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.4<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    
    
    ######
    # log, with NO product term
    ######
    model.name<-"log"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.5<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "log")
    
    ######
    # log, with product term
    ######
    model.name<-"log_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.6<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    
    ######
    # probit, with NO product term
    ######
    model.name<-"probit"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line3<-"probit y x1 x2, iter(100)"
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.7<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "probit")
    
    ######
    # probit, with product term
    ######
    model.name<-"probit_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line3<-"probit y x1 x2 x1x2, iter(100)"
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.8<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    
    ######
    # regression, with NO product term
    ######
    model.name<-"regression"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    # save the list of commands to an object
    model.9<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "regression")
    
    ######
    # regression, with product term
    ######
    model.name<-"regression_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    # save the list of commands to an object
    model.10<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    
    
    
    ######
    # loglog model, with NO product term
    ######
    model.name<-"loglog"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb" #error!
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.11<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "loglog")
    
    ######
    # loglog, with product term
    ######
    model.name<-"loglog_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb" #error!
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.12<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "loglog")
    
    
    ######
    # logc model, with NO product term
    ######
    model.name<-"logc"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb" #error!
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.13<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2))
    model.link.vector<-c(model.link.vector, "logc")
    
    ######
    # logc, with product term
    ######
    model.name<-"logc_product"
    
    # write the relevant commands for a stata do file
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb" #error!
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.14<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1*x2))
    model.link.vector<-c(model.link.vector, "logc")
    
    ######
    # END OF THE SIMPLE MODELS: RECORD THE LENGTH!
    ###### 
    SM<-length(model.name.vector)
    
    
    
    
    
    
    
    
    
    
    
    ######
    # logit model, x1sqx2
    ######
    model.name<-"logit_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2 x1sq x1sqx2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.15<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    
    
    ######
    # cloglog, x1sqx2
    ######
    model.name<-"cloglog_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    
    # save the list of commands to an object
    model.16<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    ######
    # log, x1sqx2
    ######
    model.name<-"log_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.17<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    ######
    # probit, x1sqx2
    ######
    model.name<-"probit_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"probit y x1 x2 x1x2 x1sq x1sqx2, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.18<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    ######
    # regression, x1sqx2
    ######
    model.name<-"regression_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb" #error!
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.19<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    
    
    ######
    # loglog, x1sqx2
    ######
    model.name<-"loglog_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(loglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.20<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "loglog")
    
    
    
    
    
    ######
    # logc, x1sqx2
    ######
    model.name<-"logc_x1sqx2"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x1sqx2, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.21<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x1^2*x2)))
    model.link.vector<-c(model.link.vector, "logc")
    
    
    
    
    ######
    # logit model, x2sqx1
    ######

    model.name<-"logit_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2 x2sq x2sqx1, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    # save the list of commands to an object
    model.22<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    
    
    ######
    # cloglog, x2sqx1
    ######
    model.name<-"cloglog_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x2sq x2sqx1, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    # save the list of commands to an object
    model.23<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    ######
    # log, x2sqx1
    ######
    model.name<-"log_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x2sq x2sqx1, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
  
    
    
    # save the list of commands to an object
    model.24<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    ######
    # probit, x2sqx1
    ######
    model.name<-"probit_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"probit y x1 x2 x1x2 x2sq x2sqx1, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
  
    
    
    # save the list of commands to an object
    model.25<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    ######
    # regression, x2sqx1
    ######
    model.name<-"regression_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x2sq x2sqx1, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.26<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    ######
    # loglog, x2sqx1
    ######
    model.name<-"loglog_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x2sq x2sqx1, family(binomial 1) link(loglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.27<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "loglog")
    
 
    
    ######
    # logc, x2sqx1
    ######
    model.name<-"logc_x2sqx1"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x2sq x2sqx1, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.28<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x2^2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "logc")
    
    
    
    ######
    # logit model, twosquares
    ######
    model.name<-"logit_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    # save the list of commands to an object
    model.29<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    
    
    ######
    # cloglog, twosquares
    ######
    model.name<-"cloglog_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    
    # save the list of commands to an object
    model.30<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    ######
    # log, twosquares
    ######
    model.name<-"log_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
   
    
    
    # save the list of commands to an object
    model.31<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    ######
    # probit, twosquares
    ######
    model.name<-"probit_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"probit y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
  
    
    
    # save the list of commands to an object
    model.32<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    ######
    # regression, twosquares
    ######
    model.name<-"regression_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
  
    
    
    # save the list of commands to an object
    model.33<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    
    
    ######
    # loglog, twosquares
    ######
    model.name<-"loglog_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, family(binomial 1) link(loglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.34<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "loglog")
    
    
    
    
    ######
    # logc, twosquares
    ######
    model.name<-"logc_twosquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.35<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)))
    model.link.vector<-c(model.link.vector, "logc")
    
    
    
    
    ######
    # logit model, allsquares
    ######
    model.name<-"logit_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"logit y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""

    
    # save the list of commands to an object
    model.36<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "logit")
    
    
    
    
    ######
    # cloglog, allsquares
    ######
    model.name<-"cloglog_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, family(binomial 1) link(cloglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
   
    
    
    # save the list of commands to an object
    model.37<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "cloglog")
    
    
    
    
    ######
    # log, allsquares
    ######
    model.name<-"log_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, family(binomial 1) link(log) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
  
    
    
    # save the list of commands to an object
    model.38<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "log")
    
    
    
    
    
    ######
    # probit, allsquares
    ######
    model.name<-"probit_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"probit y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred, pr"
    line12<-""
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.39<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "probit")
    
    
    
    
    
    
    ######
    # regression, allsquares
    ######
    model.name<-"regression_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, family(gaussian) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, xb"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
  
    
    
    # save the list of commands to an object
    model.40<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "regression")
    
    
    
    ######
    # loglog, allsquares
    ######
    model.name<-"loglog_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, family(binomial 1) link(loglog) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.41<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "loglog")
    
    
    
    ######
    # logc, allsquares
    ######
    model.name<-"logc_allsquares"
    
    line1<-paste("cd \"", normalizePath(getwd()), "\"", sep="")
    line2<-"use simdat_temp.dta"
    line3<-"glm y x1 x2 x1x2 x1sq x2sq x1sqx2 x2sqx1 x1sqx2sq, family(binomial 1) link(logc) iter(100)"
    line3.1<-"mat converged = e(converged)"
    line3.2<-paste("mat2txt, matrix(converged) saving(converge_", model.name, ".txt) replace", sep="")    
    line4<-"mat beta = e(b)\'"
    line5<-paste("mat2txt, matrix(beta) saving(beta_", model.name, ".txt) replace", sep="")
    line6<-"mat vcv = e(V)"
    line7<-paste("mat2txt, matrix(vcv) saving(vcv_", model.name, ".txt) replace", sep="")
    line8<-"estat ic"
    line9<-"mat ic = r(S)\'"
    line10<-paste("mat2txt, matrix(ic) saving(ic_", model.name, ".txt) replace", sep="")
    line11<-"predict pred_t, mu"
    line12<-"gen pred = min(max(pred_t,0.01),0.99)"
    line13<-"hl y pred, noroc notable"
    line14<-"mat hl = r(p)"
    line15<-paste("mat2txt, matrix(hl) saving(hl_", model.name, ".txt) replace", sep="")
    line16<-"roctab y pred"
    line17<-"mat roc = r(area)"
    line18<-paste("mat2txt, matrix(roc) saving(roc_", model.name, ".txt) replace", sep="")
    line19<-"clear all"
    line20<-""
    
    
    
    # save the list of commands to an object
    model.42<-vapply(paste("line",c(1:3, 3.1, 3.2, 4:20), sep=""), FUN=get, FUN.VALUE="")
    
    # remember this model's name, formula, and link function
    model.name.vector<-c(model.name.vector, model.name)
    model.formula.vector<-c(model.formula.vector, formula(~x1+x2+I(x1*x2)+I(x1^2)+I(x2^2)+I(x1^2*x2)+I(x2^2*x1)+I(x1^2*x2^2)))
    model.link.vector<-c(model.link.vector, "logc")
    
    
    
    
    
    
    
    
    
    
    
    # before executing models, save the list of original (non-selection-criteria) models
    model.name.vector.o<-model.name.vector
    
    # write all the Stata commands to a do file
    fileConn<-file("models.do")
    writeLines(as.vector(vapply(paste("model.",1:length(model.name.vector), sep=""), FUN=get, FUN.VALUE=rep("",22))),fileConn)
    close(fileConn)
    
    # execute that do file
    system(paste("\"C:\\Program Files (x86)\\Stata11\\StataSE-64.exe\" /e do \"",normalizePath(getwd()), "\"\\models.do\"", sep=""))
    
    # recover the beta, vcv, information criteria, convergence status, and hosmer-lemeshow/ROC fit statistics for that model
    for(j in 1:length(model.name.vector)){
      
      temp<-as.matrix(read.table(file=paste("beta_", model.name.vector[j],".txt", sep="")))
      temp<-as.matrix(temp[c(dim(temp)[1],1:(dim(temp)[1]-1)),])
      assign(paste(model.name.vector[j],".beta", sep=""), value=temp)
      
      temp<-as.matrix(read.table(file=paste("vcv_", model.name.vector[j],".txt", sep="")))
      temp<-temp[c(dim(temp)[1],1:(dim(temp)[1]-1)),]
      temp<-temp[,c(dim(temp)[1],1:(dim(temp)[1]-1))]     
      assign(paste(model.name.vector[j],".vcv", sep=""), value=temp)
      
      assign(paste(model.name.vector[j],".ic", sep=""), value=as.matrix(read.table(file=paste("ic_", model.name.vector[j],".txt", sep=""))))
      
      assign(paste(model.name.vector[j],".converge", sep=""), value=as.matrix(read.table(file=paste("converge_", model.name.vector[j],".txt", sep=""))))
      
      assign(paste(model.name.vector[j],".hl", sep=""), value=as.matrix(read.table(file=paste("hl_", model.name.vector[j],".txt", sep=""))))
      assign(paste(model.name.vector[j],".roc", sep=""), value=as.matrix(read.table(file=paste("roc_", model.name.vector[j],".txt", sep=""))))
      
    }
    
    
    
    ###
    # choose the optimal estimators via IC/HL statistics
    ###

    # store the model vectors in a temporary location for augmentation
    model.name.vector.t<-model.name.vector     
    model.formula.vector.t<-model.formula.vector
    model.link.vector.t<-model.link.vector
 
  
    
    # all models, ROC selector
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".roc", sep="")))
      }else{ic.vector<-c(ic.vector, -Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==max(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==max(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==max(ic.vector))])
    
    
    # all models, HL selector
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
 
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        cand<-get(paste(model.name.vector[j], ".hl", sep=""))
        if(suppressWarnings(is.na(as.numeric(cand))==FALSE)){
          ic.vector<-c(ic.vector, cand)
        }else{ic.vector<-c(ic.vector, Inf)}
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    
    # all models, bic selector 
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[6])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
	ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    # all models, aic selector
    ic.vector<-c()
    for(j in 1:length(model.name.vector)){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[5])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
	ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
 
    
    

    # simple models, ROC selector
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".roc", sep="")))
      }else{ic.vector<-c(ic.vector, -Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==max(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==max(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==max(ic.vector))])
    
    
    # simple models, HL selector
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        cand<-get(paste(model.name.vector[j], ".hl", sep=""))
        if(suppressWarnings(is.na(as.numeric(cand))==FALSE)){
          ic.vector<-c(ic.vector, cand)
        }else{ic.vector<-c(ic.vector, Inf)}
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    
    # simple models, bic selector 
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[6])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    # simple models, aic selector
    ic.vector<-c()
    for(j in 1:SM){
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        ic.vector<-c(ic.vector, get(paste(model.name.vector[j], ".ic", sep=""))[5])
      }else{ic.vector<-c(ic.vector, Inf)}
      
    }
    ic.vector<-as.numeric(ic.vector)+(0.00001*runif(length(ic.vector)))  # break ties
    model.name.vector.t<-c(model.name.vector.t, model.name.vector[which(ic.vector==min(ic.vector))])
    model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[which(ic.vector==min(ic.vector))])
    model.link.vector.t<-c(model.link.vector.t, model.link.vector[which(ic.vector==min(ic.vector))])
    
    

    if(probit.converge==1 & probit_product.converge==1){
      
      # probit model, roc selector
      if(probit.roc>=probit_product.roc){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, bic selector
      if(probit.ic[6]<=probit_product.ic[6]){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, aic selector
      if(probit.ic[5]<=probit_product.ic[5]){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, hl selector
      if(is.na(probit.hl)==T){probit.hl <- 1e10 + runif(1)}
      if(is.na(probit_product.hl)==T){probit_product.hl <- 1e10 + runif(1)}
      if(probit.hl<=probit_product.hl){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      # probit model, significance selector
      sig<-1.96<=abs(probit_product.beta/sqrt(diag(probit_product.vcv)))[4,1]
      if(sig==F){
        K<-which(model.name.vector=="probit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="probit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }    

    }
    
    if(probit.converge==1 & probit_product.converge==0){
     
      K<-which(model.name.vector=="probit")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(probit.converge==0 & probit_product.converge==1){
      
      K<-which(model.name.vector=="probit_product")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(probit.converge==0 & probit_product.converge==0){
      
      model.name.vector.t<-c(model.name.vector.t, rep(NA, 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(NA, 5))
      model.link.vector.t<-c(model.link.vector.t, rep(NA, 5))             
      
    }
    
    

    if(logit.converge==1 & logit_product.converge==1){
      
      # logit model, roc selector
      if(logit.roc>=logit_product.roc){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
      
      
      # logit model, bic selector
      if(logit.ic[6]<=logit_product.ic[6]){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
  
      # logit model, aic selector 
      if(logit.ic[5]<=logit_product.ic[5]){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
  
  
      # logit model, hl selector
      if(is.na(logit.hl)==T){logit.hl <- 1e10 + runif(1)}
      if(is.na(logit_product.hl)==T){logit_product.hl <- 1e10 + runif(1)}
       if(logit.hl<=logit_product.hl){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }
  
          
      # logit model, significance selector 
      sig<-1.96<=abs(logit_product.beta/sqrt(diag(logit_product.vcv)))[4,1]
      if(sig==F){
        K<-which(model.name.vector=="logit")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])      
      }else{
        K<-which(model.name.vector=="logit_product")
        model.name.vector.t<-c(model.name.vector.t, model.name.vector[K])
        model.formula.vector.t<-c(model.formula.vector.t, model.formula.vector[K])
        model.link.vector.t<-c(model.link.vector.t, model.link.vector[K])
      }    
    
    }
    
    if(logit.converge==1 & logit_product.converge==0){
      
      K<-which(model.name.vector=="logit")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(logit.converge==0 & logit_product.converge==1){
      
      K<-which(model.name.vector=="logit_product")
      model.name.vector.t<-c(model.name.vector.t, rep(model.name.vector[K], 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(model.formula.vector[K], 5))
      model.link.vector.t<-c(model.link.vector.t, rep(model.link.vector[K], 5))       
      
    }
    
    if(logit.converge==0 & logit_product.converge==0){
      
      model.name.vector.t<-c(model.name.vector.t, rep(NA, 5))
      model.formula.vector.t<-c(model.formula.vector.t, rep(NA, 5))
      model.link.vector.t<-c(model.link.vector.t, rep(NA, 5))       
      
    }
      
    
    
    
    # if this is the first iteration, create a place to store which model was selected
    if(i==1){selected.models<-matrix(data=NA, nrow=reps, ncol=length(model.name.vector.t)-length(model.name.vector))}
    selected.models[i,]<-model.name.vector.t[(length(model.name.vector)+1):length(model.name.vector.t)]
    
    
    
    # draw some observations from the asymptotic multivariate normal distribution of betas for each (non-selection-criterion) model
    for(j in 1:length(model.name.vector)){ 
      
      if(1==get(paste(model.name.vector[j], ".converge", sep=""))){
        VCV<-get(paste(model.name.vector[j],".vcv", sep=""))
        assign(paste(model.name.vector[j],".beta.draws", sep=""), value=rmvnorm(1000, mean=get(paste(model.name.vector[j],".beta", sep="")), sigma=VCV))
      }else{
        assign(paste(model.name.vector[j],".beta.draws", sep=""), value=matrix(data=NA, nrow=1000, ncol=length(get(paste(model.name.vector[j],".beta", sep="")))))
      }
    }
    
    
    
    # create result storage matrices
    if(i==1){
      
      # model convergence
      convergence.mat<-matrix(data=NA, ncol=length(model.name.vector), nrow=reps)
      
      # 84 first differences spread over the space
      fd.list<-list()
      fd.ci.list<-list()
      fd.ci.width.list<-list()
      
      # 4 central FDs, one for each model
      central.fd.list<-list()
      central.fd.ci.list<-list()
      central.fd.ci.width.list<-list()
      
      # 2 ME calculations for each model
      me.list<-list()
      me.ci.list<-list()
      me.ci.width.list<-list()
      
      # 4 ME difference calculations for each model
      me.dif.list<-list()
      me.dif.ci.list<-list()
      me.dif.ci.width.list<-list()
      
      # 2 second difference calculations for each model
      second.dif.list<-list()
      second.dif.ci.list<-list()
      second.dif.ci.width.list<-list()
      
      for(j in 1:length(model.name.vector.t)){
        
        fd.list[[j]]<-matrix(data=NA, nrow=reps, ncol=84, dimnames=NULL)  
        fd.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=84, dimnames=NULL)
        fd.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=84, dimnames=NULL)
        central.fd.list[[j]]<-matrix(data=NA, nrow=reps, ncol=4, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25")))
        central.fd.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=4, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25")))
        central.fd.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=4, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25")))
        me.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_0.5_0.5", "x2_0.5_0.5")))
        me.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_0.5_0.5", "x2_0.5_0.5")))
        me.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_0.5_0.5", "x2_0.5_0.5")))
        me.dif.list[[j]]<-matrix(data=NA, nrow=reps, ncol=4, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25")))
        me.dif.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=4, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25")))
        me.dif.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=4, dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25")))
        second.dif.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25")))
        second.dif.ci.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25")))
        second.dif.ci.width.list[[j]]<-matrix(data=NA, nrow=reps, ncol=2, dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25")))
        
      }
      colnames(convergence.mat)<-model.name.vector
      names(fd.list)<-model.name.vector.t
      names(fd.ci.list)<-model.name.vector.t
      names(fd.ci.width.list)<-model.name.vector.t
      names(central.fd.list)<-model.name.vector.t
      names(central.fd.ci.list)<-model.name.vector.t
      names(central.fd.ci.width.list)<-model.name.vector.t
      names(me.dif.list)<-model.name.vector.t
      names(me.dif.ci.list)<-model.name.vector.t
      names(me.dif.ci.width.list)<-model.name.vector.t
      names(second.dif.list)<-model.name.vector.t
      names(second.dif.ci.list)<-model.name.vector.t
      names(second.dif.ci.width.list)<-model.name.vector.t
    }
    
    # calculate performance statistics, but only for the non-conditional models
    for(j in 1:length(model.name.vector)){  
      
      convergence.mat[i,j]<-get(paste(model.name.vector[j], ".converge", sep=""))

      if(1!=get(paste(model.name.vector[j], ".converge", sep=""))){
        fd.list[[j]][i,]<-rep(NA, dim(fd.list[[j]])[2])
        central.fd.list[[j]][i,]<-rep(NA, dim(central.fd.list[[j]])[2])
        me.list[[j]][i,]<-rep(NA, dim(me.list[[j]])[2])
        me.dif.list[[j]][i,]<-rep(NA, dim(me.dif.list[[j]])[2])
        second.dif.list[[j]][i,]<-rep(NA, dim(second.dif.list[[j]])[2])
        fd.ci.list[[j]][i,]<-rep(NA, dim(fd.ci.list[[j]])[2])
        fd.ci.width.list[[j]][i,]<-rep(NA, dim(fd.ci.width.list[[j]])[2])
        central.fd.ci.list[[j]][i,]<-rep(NA, dim(central.fd.ci.list[[j]])[2])
        central.fd.ci.width.list[[j]][i,]<-rep(NA, dim(central.fd.ci.width.list[[j]])[2])
        me.ci.list[[j]][i,]<-rep(NA, dim(me.ci.list[[j]])[2])
        me.ci.width.list[[j]][i,]<-rep(NA, dim(me.ci.width.list[[j]])[2])
        me.dif.ci.list[[j]][i,]<-rep(NA, dim(me.dif.ci.list[[j]])[2])
        me.dif.ci.width.list[[j]][i,]<-rep(NA, dim(me.dif.ci.width.list[[j]])[2])
        second.dif.ci.list[[j]][i,]<-rep(NA, dim(second.dif.ci.list[[j]])[2])
        second.dif.ci.width.list[[j]][i,]<-rep(NA, dim(second.dif.ci.width.list[[j]])[2])
        
      }else{
      
        # global first differences
        fd.list[[j]][i,]<-fd(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        # central first differences
        central.fd.list[[j]][i,]<-central.fd(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        # central marginal effects
        me.list[[j]][i,]<-c(me(cbind(0.5,0.5), dir="x1", beta=get(paste(model.name.vector[j],".beta", sep="")), form=model.formula.vector[[j]], link.name=model.link.vector[j]), me(cbind(0.5,0.5), dir="x2", beta=get(paste(model.name.vector[j],".beta", sep="")), form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        
        # marginal effect differences
        me.dif.list[[j]][i,]<-me.dif(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        # second differences
        second.dif.list[[j]][i,]<-second.dif(get(paste(model.name.vector[j],".beta", sep="")), model.formula.vector[[j]], model.link.vector[j])
        
        
        ####
        # calculate 95% CI of FD and ME and ME Difs
        ####
        
        # storage for simulated first differences
        fd.draws<-matrix(data=NA, nrow=1000, ncol=84)
        central.fd.draws<-matrix(data=NA, nrow=1000, ncol=4)
        me.draws<-matrix(data=NA, nrow=1000, ncol=2)
        me.dif.draws<-matrix(data=NA, nrow=1000, ncol=4)
        second.dif.draws<-matrix(data=NA, nrow=1000, ncol=2)
        
        
        # get the simulated beta
        b.t<-get(paste(model.name.vector[j],".beta.draws", sep=""))
        
        # draw a bunch of stuff using parallel computing
        fd.draws<-t(parApply(cl, X=b.t, FUN=fd, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        central.fd.draws<-t(parApply(cl, X=b.t, FUN=central.fd, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        me.draws<-t(parApply(cl, X=b.t, FUN=me.shell.fun, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        me.dif.draws<-t(parApply(cl, X=b.t, FUN=me.dif, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        second.dif.draws<-t(parApply(cl, X=b.t, FUN=second.dif, MARGIN=1, form=model.formula.vector[[j]], link.name=model.link.vector[j]))
        
        # get the true first difference/MEs
        fd.observed<-fd.true(r)
        central.fd.observed<-central.fd.true(r)
        me.observed<-c(me.true(cbind(0.5,0.5), r, "x1"), me.true(cbind(0.5,0.5), r, "x2"))
        me.dif.observed<-me.dif.true(r)
        second.dif.observed<-second.dif.true(r)
        
        # is the true FD/ME inside the 95% CI for the simulated values?
        fd.ci<-c()
        central.fd.ci<-c()
        me.ci<-c()
        me.dif.ci<-c()
        second.dif.ci<-c()
        fd.ci.width<-c()
        central.fd.ci.width<-c()
        me.ci.width<-c()
        me.dif.ci.width<-c()
        second.dif.ci.width<-c()
        for(k in 1:84){
          fd.ci[k]<-(ecdf(fd.draws[,k])(fd.observed[k])>=0.025 & ecdf(fd.draws[,k])(fd.observed[k])<=0.975)
          fd.ci.width[k]<-quantile(fd.draws[,k], probs=0.975)-quantile(fd.draws[,k], probs=0.025)
        }
        for(k in 1:4){
          central.fd.ci[k]<-(ecdf(central.fd.draws[,k])(central.fd.observed[k])>=0.025 & ecdf(central.fd.draws[,k])(central.fd.observed[k])<=0.975)
          central.fd.ci.width[k]<-quantile(central.fd.draws[,k], probs=0.975)-quantile(central.fd.draws[,k], probs=0.025)
        }
        for(k in 1:2){
          me.ci[k]<-(ecdf(me.draws[,k])(me.observed[k])>=0.025 & ecdf(me.draws[,k])(me.observed[k])<=0.975)
          me.ci.width[k]<-quantile(me.draws[,k], probs=0.975)-quantile(me.draws[,k], probs=0.025)
        }
        for(k in 1:4){
          me.dif.ci[k]<-(ecdf(me.dif.draws[,k])(me.dif.observed[k])>=0.025 & ecdf(me.dif.draws[,k])(me.dif.observed[k])<=0.975)
          me.dif.ci.width[k]<-quantile(me.dif.draws[,k], probs=0.975)-quantile(me.dif.draws[,k], probs=0.025)
        }
        for(k in 1:2){
          second.dif.ci[k]<-(ecdf(second.dif.draws[,k])(second.dif.observed[k])>=0.025 & ecdf(second.dif.draws[,k])(second.dif.observed[k])<=0.975)
          second.dif.ci.width[k]<-quantile(second.dif.draws[,k], probs=0.975)-quantile(second.dif.draws[,k], probs=0.025)
        }
        
        fd.ci.list[[j]][i,]<-fd.ci
        central.fd.ci.list[[j]][i,]<-central.fd.ci
        me.ci.list[[j]][i,]<-me.ci
        me.dif.ci.list[[j]][i,]<-me.dif.ci
        second.dif.ci.list[[j]][i,]<-second.dif.ci

        fd.ci.width.list[[j]][i,]<-fd.ci.width
        central.fd.ci.width.list[[j]][i,]<-central.fd.ci.width
        me.ci.width.list[[j]][i,]<-me.ci.width
        me.dif.ci.width.list[[j]][i,]<-me.dif.ci.width
        second.dif.ci.width.list[[j]][i,]<-second.dif.ci.width
        
      }
      
    }
    
  # now, fill in the appropriate values for all the selection models
  for(j in (length(model.name.vector)+1):(length(model.name.vector.t))){
    
    
    if(is.na(model.name.vector.t[j])){

        fd.list[[j]][i,]<-rep(NA, dim(fd.list[[j]])[2])
        central.fd.list[[j]][i,]<-rep(NA, dim(central.fd.list[[j]])[2])
        me.list[[j]][i,]<-rep(NA, dim(me.list[[j]])[2])
        me.dif.list[[j]][i,]<-rep(NA, dim(me.dif.list[[j]])[2])
        second.dif.list[[j]][i,]<-rep(NA, dim(second.dif.list[[j]])[2])
        fd.ci.list[[j]][i,]<-rep(NA, dim(fd.ci.list[[j]])[2])
        central.fd.ci.list[[j]][i,]<-rep(NA, dim(central.fd.ci.list[[j]])[2])
        me.ci.list[[j]][i,]<-rep(NA, dim(me.ci.list[[j]])[2])
        me.dif.ci.list[[j]][i,]<-rep(NA, dim(me.dif.ci.list[[j]])[2])
        second.dif.ci.list[[j]][i,]<-rep(NA, dim(second.dif.ci.list[[j]])[2])
        fd.ci.width.list[[j]][i,]<-rep(NA, dim(fd.ci.width.list[[j]])[2])
        central.fd.ci.width.list[[j]][i,]<-rep(NA, dim(central.fd.ci.width.list[[j]])[2])
        me.ci.width.list[[j]][i,]<-rep(NA, dim(me.ci.width.list[[j]])[2])
        me.dif.ci.width.list[[j]][i,]<-rep(NA, dim(me.dif.ci.width.list[[j]])[2])
        second.dif.ci.width.list[[j]][i,]<-rep(NA, dim(second.dif.ci.width.list[[j]])[2])
        
        
      }else{      
      
        k=which(model.name.vector.t[j]==model.name.vector)
        
        fd.list[[j]][i,]<-fd.list[[k]][i,]
        central.fd.list[[j]][i,]<-central.fd.list[[k]][i,]
        me.list[[j]][i,]<-me.list[[k]][i,]   
        me.dif.list[[j]][i,]<-me.dif.list[[k]][i,]
        second.dif.list[[j]][i,]<-second.dif.list[[k]][i,]
        
        fd.ci.list[[j]][i,]<-fd.ci.list[[k]][i,]
        central.fd.ci.list[[j]][i,]<-central.fd.ci.list[[k]][i,]
        me.ci.list[[j]][i,]<-me.ci.list[[k]][i,]
        me.dif.ci.list[[j]][i,]<-me.dif.ci.list[[k]][i,]
        second.dif.ci.list[[j]][i,]<-second.dif.ci.list[[k]][i,]
        
        fd.ci.width.list[[j]][i,]<-fd.ci.width.list[[k]][i,]
        central.fd.ci.width.list[[j]][i,]<-central.fd.ci.width.list[[k]][i,]
        me.ci.width.list[[j]][i,]<-me.ci.width.list[[k]][i,]
        me.dif.ci.width.list[[j]][i,]<-me.dif.ci.width.list[[k]][i,]
        second.dif.ci.width.list[[j]][i,]<-second.dif.ci.width.list[[k]][i,]
        
    
      }
  }

  # now set the link, name, and formula vectors to be their appended version
  model.name.vector<-model.name.vector.t
  model.link.vector<-model.link.vector.t
  model.formula.vector<-model.formula.vector.t
    
  }
  close(pb)
  
  
  ####################
  # store all the measurements for each DGP
  ####################
  
  # reset the model name vector to accurately reflect the automatic selection procedures
  selection.model.names<-c("roc", "hl", "bic", "aic", "roc-simp", "hl-simp", "bic-simp", "aic-simp", "probit_roc", "probit_bic", "probit_aic", "probit_hl", "probit_sig", "logit_roc", "logit_bic",  "logit_aic", "logit_hl", "logit_sig")
  model.name.vector[(length(model.name.vector)-length(selection.model.names)+1):length(model.name.vector)]<-selection.model.names
  
  # if this is the first DGP, create the storage matrices for the overall run
  if(r==1){
    
    # convergence storage
    convergence.report.mat<-matrix(data=NA, nrow=155, ncol=dim(convergence.mat)[2])
    colnames(convergence.report.mat)<-model.name.vector[1:(length(model.name.vector)-length(selection.model.names))]

    # RMSE Storage
    fd.rmse<-array(data=NA, dim=c(155, 84, length(model.formula.vector)), dimnames=list(NULL, NULL, model.name.vector)) 
    central.fd.rmse<-array(data=NA, dim=c(155, 4, length(model.formula.vector)), dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25"), model.name.vector))
    me.rmse<-array(data=NA, dim=c(155, 2, length(model.formula.vector)), dimnames=list(NULL, c("x1_0.5_0.5", "x2_0.5_0.5"), model.name.vector))
    me.dif.rmse<-array(data=NA, dim=c(155, 4, length(model.formula.vector)), dimnames=list(NULL, c("x1_1_0", "x1_0.75_0.25", "x2_1_0", "x2_0.75_0.25"), model.name.vector))
    second.dif.rmse<-array(data=NA, dim=c(155, 2, length(model.formula.vector)), dimnames=list(NULL, c("x1_1_0","x1_0.75_0.25"), model.name.vector))
    
    # CI storage
    fd.ci.count<-array(data=NA, dim=c(155, 84, length(model.formula.vector)))
    central.fd.ci.count<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    me.ci.count<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.ci.count<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    second.dif.ci.count<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    # CI width storage
    fd.ci.width.mean<-array(data=NA, dim=c(155, 84, length(model.formula.vector)))
    central.fd.ci.width.mean<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    me.ci.width.mean<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.ci.width.mean<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    second.dif.ci.width.mean<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    
    # bias storage
    fd.bias<-array(data=NA, dim=c(155, 84, length(model.formula.vector)))
    central.fd.bias<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    me.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.bias<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    second.dif.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))

    # absolute bias storage
    fd.abs.bias<-array(data=NA, dim=c(155, 84, length(model.formula.vector)))
    central.fd.abs.bias<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    me.abs.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.abs.bias<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    second.dif.abs.bias<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    # error SD storage
    fd.error.sd<-array(data=NA, dim=c(155, 84, length(model.formula.vector)))
    central.fd.error.sd<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    me.error.sd<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    me.dif.error.sd<-array(data=NA, dim=c(155, 4, length(model.formula.vector)))
    second.dif.error.sd<-array(data=NA, dim=c(155, 2, length(model.formula.vector)))
    
    # automatic selection criteria storage
    selected.models.store<-array(data=NA, dim=c(155,reps,dim(selected.models)[2]), dimnames=list(NULL, NULL, selection.model.names))
    
    # which model, between product and no product, was closer to the second difference?
    second.dif.which.closer<-matrix(data=NA, nrow=155, ncol=7)
    
  }
  
  # store the convergence status of models
  convergence.report.mat[r,]<-colSums(convergence.mat)
  
  
  # store which models were selected for each of the automated criteria
  selected.models.store[r,,]<-selected.models
  
  # RMSEs for all measurements
  sqerror<-function(result, true){
    return((result-true)^2)
  }
  
  true.temp<-matrix(data=fd.true(r), byrow=T, nrow=reps, ncol=length(fd.true(r)))
  temp<-lapply(X=fd.list, FUN=sqerror, true=true.temp)
  fd.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  true.temp<-matrix(data=central.fd.true(r), byrow=T, nrow=reps, ncol=length(central.fd.true(r)))
  temp<-lapply(X=central.fd.list, FUN=sqerror, true=true.temp)
  central.fd.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  me.true.temp.store<-c(me.true(cbind(0.5,0.5), r=r, dir="x1"), me.true(cbind(0.5,0.5), r=r, dir="x2"))
  true.temp<-matrix(data=me.true.temp.store, byrow=T, nrow=reps, ncol=length(me.true.temp.store))
  temp<-lapply(X=me.list, FUN=sqerror, true=true.temp)
  me.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  true.temp<-matrix(data=me.dif.true(r), byrow=T, nrow=reps, ncol=length(me.dif.true(r)))
  temp<-lapply(X=me.dif.list, FUN=sqerror, true=true.temp)
  me.dif.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  true.temp<-matrix(data=second.dif.true(r), byrow=T, nrow=reps, ncol=length(second.dif.true(r)))
  temp<-lapply(X=second.dif.list, FUN=sqerror, true=true.temp)
  second.dif.rmse[r,,]<-sqrt(sapply(X=temp, FUN=colMeans, na.rm=T))
  
  
  # CI storage for all measurements
  
  for(j in 1:length(model.name.vector)){
    fd.ci.count[r,,j]<-apply(X=fd.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    central.fd.ci.count[r,,j]<-apply(X=central.fd.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    me.ci.count[r,,j]<-apply(X=me.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    me.dif.ci.count[r,,j]<-apply(X=me.dif.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    second.dif.ci.count[r,,j]<-apply(X=second.dif.ci.list[[j]], MARGIN=2, FUN=sum, na.rm=T)
    
    fd.ci.width.mean[r,,j]<-apply(X=fd.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    central.fd.ci.width.mean[r,,j]<-apply(X=central.fd.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    me.ci.width.mean[r,,j]<-apply(X=me.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    me.dif.ci.width.mean[r,,j]<-apply(X=me.dif.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    second.dif.ci.width.mean[r,,j]<-apply(X=second.dif.ci.width.list[[j]], MARGIN=2, FUN=mean, na.rm=T)
    
    
        
  }# note: NA removal means that non-converged models will count as "misses" for CI storage purposes
  
  # Bias and SD storage for all measurements
  bias<-function(measured, true){
    return(na.omit(measured-true))
  }
  
  colSDs<-function(mat){
    return(apply(X=mat, FUN=sd, MARGIN=2, na.rm=T))
  }
  
  true.temp<-matrix(data=fd.true(r), byrow=T, nrow=reps, ncol=length(fd.true(r)))
  temp<-lapply(FUN=bias, X=fd.list, true=true.temp)
  fd.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  fd.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  fd.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=central.fd.true(r), byrow=T, nrow=reps, ncol=length(central.fd.true(r)))
  temp<-lapply(FUN=bias, X=central.fd.list, true=true.temp)
  central.fd.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  central.fd.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  central.fd.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=me.true.temp.store, byrow=T, nrow=reps, ncol=length(me.true.temp.store))
  temp<-lapply(FUN=bias, X=me.list, true=true.temp)
  me.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  me.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  me.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=me.dif.true(r), byrow=T, nrow=reps, ncol=length(me.dif.true(r)))
  temp<-lapply(FUN=bias, X=me.dif.list, true=true.temp)
  me.dif.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  me.dif.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  me.dif.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  true.temp<-matrix(data=second.dif.true(r), byrow=T, nrow=reps, ncol=length(second.dif.true(r)))
  temp<-lapply(FUN=bias, X=second.dif.list, true=true.temp)
  second.dif.bias[r,,]<-sapply(FUN=colMeans, X=temp, na.rm=T)
  temp.abs<-lapply(FUN=abs, X=temp)
  second.dif.abs.bias[r,,]<-sapply(FUN=colMeans, X=temp.abs, na.rm=T)
  second.dif.error.sd[r,,]<-sapply(FUN=colSDs, X=temp)
  
  # which of the simple estimators got closer for the second difference?
  bias.na<-function(measured, true){
    return(measured-true)
  }
  true.temp<-matrix(data=second.dif.true(r), byrow=T, nrow=reps, ncol=length(second.dif.true(r)))
  temp<-lapply(FUN=bias.na, X=second.dif.list, true=true.temp)
  temp.abs<-lapply(FUN=abs, X=temp)
  
  second.dif.which.closer[r,1]<-sum(1==apply(X=cbind(temp.abs[[1]][,1], temp.abs[[2]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[1]][,1], temp.abs[[2]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,2]<-sum(1==apply(X=cbind(temp.abs[[3]][,1], temp.abs[[4]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[3]][,1], temp.abs[[4]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,3]<-sum(1==apply(X=cbind(temp.abs[[5]][,1], temp.abs[[6]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[5]][,1], temp.abs[[6]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,4]<-sum(1==apply(X=cbind(temp.abs[[7]][,1], temp.abs[[8]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[7]][,1], temp.abs[[8]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,5]<-sum(1==apply(X=cbind(temp.abs[[9]][,1], temp.abs[[10]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[9]][,1], temp.abs[[10]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,6]<-sum(1==apply(X=cbind(temp.abs[[11]][,1], temp.abs[[12]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[11]][,1], temp.abs[[12]][,1]), FUN=which.min, MARGIN=1))))
  
  second.dif.which.closer[r,7]<-sum(1==apply(X=cbind(temp.abs[[13]][,1], temp.abs[[14]][,1]), FUN=which.min, MARGIN=1), na.rm=T)/(reps-sum(is.na(1==apply(X=cbind(temp.abs[[13]][,1], temp.abs[[14]][,1]), FUN=which.min, MARGIN=1))))

  
}
















# Post-processing and analysis

# reference indexes for each type of DGP
continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115

linear.additive<-1:24
linear.secdif.10<-50:53
linear.secdif.20<-62:65
linear.secdif.30<-74:79
linear.secdif.40<-92:103
linear.secdif.50<-104:107

nonlinear.additive<-25:49
nonlinear.secdif.10<-54:61
nonlinear.secdif.20<-66:73
nonlinear.secdif.30<-80:91
nonlinear.secdif.40<-96:103
nonlinear.secdif.50<-108:115

additive.null<-1:9
additive.x.null<-10:14
additive.nonull<-15:24
nonlinear.x.null<-25:39
nonlinear.nonull<-40:49
additive.x.null.two<-c(10:14,25:39)
additive.nonull.two<-c(15:24,40:49)

analysis.titles<-c("additive", "min-max second difference = .10", "min-max second difference = .20", "min-max second difference = .30", "min-max second difference = .40", "min-max second difference = .50", "linear additive", "linear min-max second difference = .10", "linear min-max second difference = .20", "linear min-max second difference = .30", "linear min-max second difference = .40", "linear min-max second difference = .50", "nonlinear additive", "nonlinear min-max second difference = .10", "nonlinear min-max second difference = .20", "nonlinear min-max second difference = .30", "nonlinear min-max second difference = .40", "nonlinear min-max second difference = .50", "additive null", "additive x null z not", "additive no null", "nonlinear x null z not", "nonlinear no null", "additive x null z not expanded", "additive no null expanded")

analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50, linear.additive, linear.secdif.10, linear.secdif.20, linear.secdif.30, linear.secdif.40, linear.secdif.50, nonlinear.additive, nonlinear.secdif.10, nonlinear.secdif.20, nonlinear.secdif.30, nonlinear.secdif.40, nonlinear.secdif.50, additive.null, additive.x.null, additive.nonull, nonlinear.x.null, nonlinear.nonull, additive.x.null.two, additive.nonull.two)

names(analysis.indices)<-analysis.titles


# now print out all the analysis!

# First differences
x1.1a<-c(rep(1, 21), rep(.75, 21), seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05))
x2.1a<-c(seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05), rep(1, 21), rep(.75, 21))
x1.2a<-c(rep(0, 21), rep(.25, 21), seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05))
x2.2a<-c(seq(from=0,to=1,by=0.05), seq(from=0,to=1,by=0.05), rep(0, 21), rep(.25, 21))

fd.mat<-cbind(x1.1a,x2.1a,NA,x1.2a,x2.2a)
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI", "CI-width")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  fd.mat.temp<-cbind(apply(X=fd.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=fd.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps, apply(X=fd.ci.width.mean[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans))
  colnames(fd.mat.temp)<-result.labels
  fd.mat<-cbind(fd.mat, fd.mat.temp)
  
}

write.csv(file="first_difference_results.csv", t(fd.mat))




# Central first differences
central.fd.mat<-matrix(data=NA, ncol=0, nrow=dim(central.fd.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI", "CI-width")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  central.fd.mat.temp<-cbind(apply(X=central.fd.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=central.fd.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps, apply(X=central.fd.ci.width.mean[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans))
  colnames(central.fd.mat.temp)<-result.labels
  central.fd.mat<-cbind(central.fd.mat, central.fd.mat.temp)
  
}
write.csv(file="central_fd_results.csv", t(central.fd.mat))



# Marginal effects
me.mat<-matrix(data=NA, ncol=0, nrow=dim(me.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI", "CI-width")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  me.mat.temp<-cbind(apply(X=me.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps, apply(X=me.ci.width.mean[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans))
  colnames(me.mat.temp)<-result.labels
  me.mat<-cbind(me.mat, me.mat.temp)
  
}
write.csv(file="me_results.csv", t(me.mat))




# Marginal effects differences
me.dif.mat<-matrix(data=NA, ncol=0, nrow=dim(me.dif.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI", "CI-width")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  me.dif.mat.temp<-cbind(apply(X=me.dif.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=me.dif.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps, apply(X=me.dif.ci.width.mean[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans))
  colnames(me.dif.mat.temp)<-result.labels
  me.dif.mat<-cbind(me.dif.mat, me.dif.mat.temp)
  
}
write.csv(file="me_dif_results.csv", t(me.dif.mat))





# Second differences
second.dif.mat<-matrix(data=NA, ncol=0, nrow=dim(second.dif.bias)[2])
for(i in 1:length(analysis.titles)){
  
  name.one<-model.name.vector
  name.two<-c("RMSE", "bias", "abs-bias", "StDev", "%-true-in-CI", "CI-width")
  result.labels<-as.vector(outer(name.one, name.two, paste, sep="_"))
  result.labels<-paste(analysis.titles[i], result.labels)
  second.dif.mat.temp<-cbind(apply(X=second.dif.rmse[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.abs.bias[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.error.sd[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans), apply(X=second.dif.ci.count[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans)/reps, apply(X=second.dif.ci.width.mean[analysis.indices[[i]],,], MARGIN=3, FUN=colMeans))
  colnames(second.dif.mat.temp)<-result.labels
  rownames(second.dif.mat.temp)<-c("2nddif_1_0", "2nddif_0.75_0.25")
  second.dif.mat<-cbind(second.dif.mat, second.dif.mat.temp)
  
}
write.csv(file="second_difference_results.csv", t(second.dif.mat))






# how often was each model selected?
selection.counts<-array(data=NA, dim=c(155,length(model.name.vector.o),length(selection.model.names)), dimnames=c(NULL, model.name.vector.o, selection.model.names))
for(k in 1:length(selection.model.names)){
  
  for(i in 1:155){
    
    for(j in 1:length(model.name.vector.o)){
      
      selection.counts[i,j,k]<-length(which(model.name.vector.o[j]==selected.models.store[i,,k]))
      
    }
    
  }
  
}


for(i in 1:length(analysis.indices)){
	selection.counts.temp<-matrix(data=0, nrow=length(model.name.vector.o), ncol=length(selection.model.names))
	for(m in 1:length(analysis.indices[[i]])){
		selection.counts.temp<-selection.counts.temp+selection.counts[analysis.indices[[i]][m],,]
	}
	colnames(selection.counts.temp)<-selection.model.names
	rownames(selection.counts.temp)<-model.name.vector.o
	write.csv(file=paste("z_",analysis.titles[i],"_selected_models.csv", sep=""), selection.counts.temp)  
}




# which model (product or no product) was closer to the true second difference?
names(second.dif.which.closer)<-c("logit", "cloglog", "log", "probit", "regression", "loglog", "logc")

second.dif.which.closer.summary<-matrix(data=NA, ncol=7, nrow=length(analysis.indices))
names(second.dif.which.closer.summary)<-c("logit", "cloglog", "log", "probit", "regression", "loglog", "logc")

rownames(second.dif.which.closer.summary)<-analysis.titles
colnames(second.dif.which.closer.summary)<-c("logit", "cloglog", "log", "probit", "regression", "loglog", "logc")
for(i in 1:length(analysis.indices)){

    second.dif.which.closer.summary[i,]<-colMeans(second.dif.which.closer[analysis.indices[[i]],])
  
}

write.csv(file="pro_nopro_second_dif_comparison.csv", second.dif.which.closer)
write.csv(file="pro_nopro_grouped_second_dif_comparison.csv", second.dif.which.closer.summary)


# how often did each model converge?
write.csv(file="convergence.report.csv", convergence.report.mat)




stopCluster(cl)

save.image("2014-12-15_continuous-sim-results.RData")











###################################################################
#
# Part Three: analysis of model results on Monte Carlo data sets
# 
###################################################################



# second difference analysis
##############################

# load in the results from our simulations
rm(list=ls())
load("2014-12-15_continuous-sim-results.RData")
library(ggplot2)

# make a "pretty" model name vector
model.name.pretty<-c("2A: logit basic", "2B: logit product", "7A: cloglog basic", "7B: cloglog product", "4A: log basic", "4B: log product", "3A: probit basic", "3B: probit product", "1A: regression basic", "1B: regression product", "6A: loglog basic", "6B: loglog product", "5A: logc basic", "5B: logc product", "2C: logit X2", "7C: cloglog X2", "4C: log X2", "3C: probit X2", "1C: regression X2", "6C: loglog X2", "5C: logc X2", "2D: logit Z2", "7D: cloglog Z2", "4D: log Z2", "3D: probit Z2", "1D: regression Z2", "6D: loglog Z2", "5C: logc Z2", "2E: logit X2 & Z2", "7E: cloglog X2 & Z2", "4E: log X2 & Z2", "3E: probit X2 & Z2", "1E: regression X2 & Z2", "6E: loglog X2 & Z2", "5E: logc X2 & Z2", "2F: logit saturated", "7F: cloglog saturated", "4F: log saturated", "3F: probit saturated", "1F: regression saturated", "6F: loglog saturated", "5F: logc saturated", "roc", "hl", "12: BIC", "8: AIC", "roc simple", "hl simple", "13: BIC simple", "9: AIC simple", "probit roc", "15: probit BIC", "11: probit AIC", "probit hl", "17: probit significance", "logit roc", "14: logit BIC", "10: logit AIC", "logit hl", "16: logit significance")


# exclude ROC/HL models
target.models<-c(1:42,45:46,49:50,52:53,55,57:58,60)


# display the confidence interval performance 
ci.plot.data<-t(second.dif.ci.count[1:115,1,][,target.models])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.add<-t(second.dif.ci.count[analysis.indices[[1]],1,][,target.models])
ci.plot.vector.add<-round(100*apply(X=ci.plot.data.add, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.weak<-t(second.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,target.models])
ci.plot.vector.weak<-round(100*apply(X=ci.plot.data.weak, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.strong<-t(second.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,target.models])
ci.plot.vector.strong<-round(100*apply(X=ci.plot.data.strong, FUN=mean, MARGIN=1)/25, 1)
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot.vector, ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong, group=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]))

# Figure 2: Min-Max Second Difference CI Performance of All Models

groups<-rep(c("additive", "weak interaction", "strong interaction"), each=length(target.models))
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot = c(ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong), model.name=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]), groups=factor(groups, levels=c("additive", "weak interaction", "strong interaction")))
p<-ggplot(df)
p+geom_point(aes(ci.plot, model.name, shape=groups), size=3)+ xlab("% of Data Sets with 95% Confidence Interval Containing True Value") + ylab("Estimation Model") + theme(axis.text = element_text(size=14, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))+geom_hline(yintercept=28.5)+scale_shape_discrete(name="DGP Category")
dev.copy2pdf(file="conf_interval_performance.pdf")

best.ind<-which(ci.plot.vector.add>=90 & ci.plot.vector.weak>=90 & ci.plot.vector.strong>=90)



# second difference abs bias
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.abs.bias[analysis.indices[[1]],1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label


# second difference abs bias
# Strongly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# now the best CI models
additive.plot.data<-t(second.dif.abs.bias[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label


# second difference Abs bias
# Weakly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.abs.bias[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label



# Figure 3: Min-Max Second Difference Absolute Error Performance of Models with 
# Accurate 90% Confidence Intervals

# create a comprehensive absolute bias plot for second differences
# start by ordering the elements
additive.plot.data<-t(second.dif.abs.bias[1:115,1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(0.04, 0.21)) +  ylab("Average Absolute Error in Estimate of Min-Max Second Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

dev.copy2pdf(file="second-difference-bias.pdf")

str.un <- unique(df$strength)
group.un <- unique(df$group)
med <- c(); qt1 <- c(); qt2 <- c(); k=0
for(i in 1:length(str.un)){
  for(j in 1:length(group.un)){
    k = k+1
    df.s <- subset(df, subset = (strength == str.un[i] & group == group.un[j]) )
    med[k] <- median(df.s$additive.plot.vector)
    qt1[k] <- quantile(df.s$additive.plot.vector, probs=c(0.25))
    qt2[k] <- quantile(df.s$additive.plot.vector, probs=c(0.75))
  }
}
summary(med)
summary(c(qt1, qt2))


# Auxiliary plot (not used in paper)
# compare absolute value of bias in min-max second difference estimation for different models
# compare logit with product term to saturated logit model
colvec<-c(rep(1, length(continuous.additive)), rep(2, length(min.max.secdif.10)), rep(3, length(min.max.secdif.20)), rep(4, length(min.max.secdif.30)), rep(5, length(min.max.secdif.40)), rep(6, length(min.max.secdif.50)))

y1<-second.dif.abs.bias[1:115, 1, 2]
y2<-second.dif.abs.bias[1:115, 1, 36]
plot(y1~y2, main="Logit Product Only vs. All Polynomials", ylab="Abs sec. dif. error for saturated logit (Model 2F)", xlab="Abs sec. dif. error for logit w/ product term (Model 2B)", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)
legend("topleft", legend=c("0", "0.1", "0.2", "0.3", "0.4", "0.5"), pch=rep(1,6), pt.cex=(5/6+(1/6)*1:6), title=expression(paste("   ", abs(Delta*Delta["min-max"]), "   ")))
dev.copy2pdf(file="logit-product-v-polynomials.pdf")


# Auxiliary plot (not used in paper)
# compare absolute value of bias in min-max second difference estimation for different models
# compare probit with product term to saturated logit model
y1<-second.dif.abs.bias[1:115, 1, 8]
y2<-second.dif.abs.bias[1:115, 1, 39]
plot(y1~y2, main="Probit Product Only vs. All Polynomials", ylab="Abs sec. dif. error for saturated probit (Model 3F)", xlab="Abs sec. dif. error for probit w/ product term (Model 3B)", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)
legend("topleft", legend=c("0", "0.1", "0.2", "0.3", "0.4", "0.5"), pch=rep(1,6), pt.cex=(5/6+(1/6)*1:6), title=expression(paste("   ", abs(Delta*Delta["min-max"]), "   ")))
dev.copy2pdf(file="probit-product-v-polynomials.pdf")

# Auxiliary plot (not used in paper)
# compare absolute value of bias in min-max second difference estimation for different models
# compare saturated probit to saturated logit model
y1<-second.dif.abs.bias[1:155, 1, 36]
y2<-second.dif.abs.bias[1:155, 1, 39]
plot(y2~y1, xlab="logit, all squared terms", ylab="probit, all squared terms", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)


# Auxiliary plot (not used in paper)
# compare signed value of bias in min-max second difference estimation for different models
# compare logit with product term to saturated probit model
y1<-second.dif.bias[1:115, 1, 8]
y2<-second.dif.bias[1:115, 1, 39]
plot(y1~y2, main="Probit Product Only vs. All Polynomials", ylab="Sec. dif. bias for saturated probit (Model 3F)", xlab="Sec. dif. bias for probit w/ product term (Model 3B)", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)
legend("topleft", legend=c("0", "0.1", "0.2", "0.3", "0.4", "0.5"), pch=rep(1,6), pt.cex=(5/6+(1/6)*1:6), title=expression(paste("   ", abs(Delta*Delta["min-max"]), "   ")))
dev.copy2pdf(file="probit-product-v-polynomials-bias.pdf")








# second difference CI widths
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.ci.width.mean[analysis.indices[[1]],1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label


# second difference CI Widths
# Strongly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# now the best CI models
additive.plot.data<-t(second.dif.ci.width.mean[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label


# second difference CI widths
# Weakly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.ci.width.mean[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label



# Auxiliary plot (not used in paper) 
# Width of 95% confidence intervals for min-max second difference
# create a comprehensive absolute bias plot for second differences
# start by ordering the elements
additive.plot.data<-t(second.dif.ci.width.mean[1:115,1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(0.35, 0.8)) +  ylab("Average Width of Confidence Intervals for Min-Max Second Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

# Figure S-10
dev.copy2pdf(file="second-difference-ci-width.pdf")




# create a comprehensive RMSE plot for second differences

# second difference RMSE levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.rmse[analysis.indices[[1]],1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# second difference RMSE
# Strongly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# now the best CI models
additive.plot.data<-t(second.dif.rmse[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label



# second difference RMSE
# Weakly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
additive.plot.data<-t(second.dif.rmse[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# Auxiliary plot (not used in paper) 
# create a comprehensive RMSE plot for min-max second differences
# start by ordering the elements
additive.plot.data<-t(second.dif.rmse[1:115,1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(0.04, 0.31)) +  ylab("RMSE in Second Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

dev.copy2pdf(file="second-difference-rmse.pdf")






# make a chart of the signed avg. second diff bias
# load in the results from our simulations
rm(list=ls())
load("2014-12-15_continuous-sim-results.RData")
library(ggplot2)


# make a "pretty" model name vector
model.name.pretty<-c("2A: logit basic", "2B: logit product", "7A: cloglog basic", "7B: cloglog product", "4A: log basic", "4B: log product", "3A: probit basic", "3B: probit product", "1A: regression basic", "1B: regression product", "6A: loglog basic", "6B: loglog product", "5A: logc basic", "5B: logc product", "2C: logit X2", "7C: cloglog X2", "4C: log X2", "3C: probit X2", "1C: regression X2", "6C: loglog X2", "5C: logc X2", "2D: logit Z2", "7D: cloglog Z2", "4D: log Z2", "3D: probit Z2", "1D: regression Z2", "6D: loglog Z2", "5C: logc Z2", "2E: logit X2 & Z2", "7E: cloglog X2 & Z2", "4E: log X2 & Z2", "3E: probit X2 & Z2", "1E: regression X2 & Z2", "6E: loglog X2 & Z2", "5E: logc X2 & Z2", "2F: logit saturated", "7F: cloglog saturated", "4F: log saturated", "3F: probit saturated", "1F: regression saturated", "6F: loglog saturated", "5F: logc saturated", "roc", "hl", "12: BIC", "8: AIC", "roc simple", "hl simple", "13: BIC simple", "9: AIC simple", "probit roc", "15: probit BIC", "11: probit AIC", "probit hl", "17: probit significance", "logit roc", "14: logit BIC", "10: logit AIC", "logit hl", "16: logit significance")


# exclude erroneous ROC/HL models
target.models<-c(1:42,45:46,49:50,52:53,55,57:58,60)

# compute the sign of the second difference
secdif.sign <- c()
for(i in 1:155){
  secdif.sign[i] <- sign(second.dif.true(i)[1])
  if(secdif.sign[i]==0){secdif.sign[i] <- 1}
}


# display the confidence interval performance 
ci.plot.data<-t(second.dif.ci.count[1:115,1,][,target.models])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.add<-t(second.dif.ci.count[analysis.indices[[1]],1,][,target.models])
ci.plot.vector.add<-round(100*apply(X=ci.plot.data.add, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.weak<-t(second.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,target.models])
ci.plot.vector.weak<-round(100*apply(X=ci.plot.data.weak, FUN=mean, MARGIN=1)/25, 1)
ci.plot.data.strong<-t(second.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,target.models])
ci.plot.vector.strong<-round(100*apply(X=ci.plot.data.strong, FUN=mean, MARGIN=1)/25, 1)
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot.vector, ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong, group=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]))

groups<-rep(c("additive", "weak int.", "strong int."), each=length(target.models))
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot = c(ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong), model.name=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]), groups=factor(groups, levels=c("additive", "weak int.", "strong int.")))

best.ind<-which(ci.plot.vector.add>=90 & ci.plot.vector.weak>=90 & ci.plot.vector.strong>=90)


# second difference abs bias levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
ci.plot.data<-t(second.dif.ci.count[analysis.indices[[1]],1,])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
#best.ind<-which((ci.plot.vector)>92.5 & ci.plot.vector<97.5)
#best.ind<-best.ind[which(best.ind %in% target.models)] # excludes the bad roc/hl models
ci.plot.vector<-ci.plot.vector[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
additive.plot.data<-t(second.dif.bias[analysis.indices[[1]],1,][,best.ind])
secdif.sign.mat <- matrix(data=secdif.sign[analysis.indices[[1]]], ncol=length(analysis.indices[[1]]), nrow=dim(additive.plot.data)[1], byrow=T)
additive.plot.data <- additive.plot.data*secdif.sign.mat
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
colorlabel<-ifelse(best.ind[o]>42, "darkred", "black")

additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# second difference abs bias
# Strongly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# now the best CI models
ci.plot.data<-t(second.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),1,])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
#best.ind<-which((ci.plot.vector)>92.5 & ci.plot.vector<97.5)
#best.ind<-best.ind[which(best.ind %in% target.models)] # excludes the bad roc/hl models
ci.plot.vector<-ci.plot.vector[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(second.dif.bias[c(analysis.indices[[5]], analysis.indices[[6]]),1,][,best.ind])
secdif.sign.mat <- matrix(data=secdif.sign[c(analysis.indices[[5]], analysis.indices[[6]])], ncol=length(c(analysis.indices[[5]], analysis.indices[[6]])), nrow=dim(additive.plot.data)[1], byrow=T)
additive.plot.data <- additive.plot.data*secdif.sign.mat
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)


group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
colorlabel<-ifelse(best.ind[o]>42, "darkred", "black")

additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label



# second difference Abs bias
# Weakly Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

# now the best CI models
ci.plot.data<-t(second.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,])
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)
#best.ind<-which((ci.plot.vector)>92.5 & ci.plot.vector<97.5)
#best.ind<-best.ind[which(best.ind %in% target.models)] # excludes the bad roc/hl models
ci.plot.vector<-ci.plot.vector[best.ind]
ci.label.vector<-paste("(", ci.plot.vector, "%)", sep="")
o<-order(ci.plot.vector, decreasing=F)
additive.plot.data<-t(second.dif.bias[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),1,][,best.ind])
secdif.sign.mat <- matrix(data=secdif.sign[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]])], ncol=length(c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]])), nrow=dim(additive.plot.data)[1], byrow=T)
additive.plot.data <- additive.plot.data*secdif.sign.mat
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
colorlabel<-ifelse(best.ind[o]>42, "darkred", "black")

additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# create a comprehensive absolute bias plot for second differences
# start by ordering the elements
additive.plot.data<-t(second.dif.bias[1:115,1,][,best.ind])
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.21, 0.21)) +  ylab("Average Error in Estimate of Min-Max Second Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

# Figure S-8
dev.copy2pdf(file="second-difference-strength.pdf")













# marginal effect difference analysis
########################################



# load in the results from our simulations
rm(list=ls())
load("2014-12-15_continuous-sim-results.RData")
library(ggplot2)

# make a "pretty" model name vector
model.name.pretty<-c("2A: logit basic", "2B: logit product", "7A: cloglog basic", "7B: cloglog product", "4A: log basic", "4B: log product", "3A: probit basic", "3B: probit product", "1A: regression basic", "1B: regression product", "6A: loglog basic", "6B: loglog product", "5A: logc basic", "5B: logc product", "2C: logit X2", "7C: cloglog X2", "4C: log X2", "3C: probit X2", "1C: regression X2", "6C: loglog X2", "5C: logc X2", "2D: logit Z2", "7D: cloglog Z2", "4D: log Z2", "3D: probit Z2", "1D: regression Z2", "6D: loglog Z2", "5C: logc Z2", "2E: logit X2 & Z2", "7E: cloglog X2 & Z2", "4E: log X2 & Z2", "3E: probit X2 & Z2", "1E: regression X2 & Z2", "6E: loglog X2 & Z2", "5E: logc X2 & Z2", "2F: logit saturated", "7F: cloglog saturated", "4F: log saturated", "3F: probit saturated", "1F: regression saturated", "6F: loglog saturated", "5F: logc saturated", "roc", "hl", "12: BIC", "8: AIC", "roc simple", "hl simple", "13: BIC simple", "9: AIC simple", "probit roc", "15: probit BIC", "11: probit AIC", "probit hl", "17: probit significance", "logit roc", "14: logit BIC", "10: logit AIC", "logit hl", "16: logit significance")

# exclude ROC/HL models
target.models<-c(1:42,45:46,49:50,52:53,55,57:58,60)


# Marginal Effect difference abs bias levels
# display the confidence interval performance 
ci.plot.data<-me.dif.ci.count[1:115,c(1,3),target.models]
ci.plot.data<-aperm(ci.plot.data, c(3,1,2))
dim(ci.plot.data)<-c(52,115*2)
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.strong<-me.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),c(1,3),target.models]
ci.plot.data.strong<-aperm(ci.plot.data.strong, c(3,1,2))
dim(ci.plot.data.strong)<-c(52,24*2)
ci.plot.vector.strong<-round(100*apply(X=ci.plot.data.strong, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.weak<-me.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),c(1,3),target.models]
ci.plot.data.weak<-aperm(ci.plot.data.weak, c(3,1,2))
dim(ci.plot.data.weak)<-c(52,42*2)
ci.plot.vector.weak<-round(100*apply(X=ci.plot.data.weak, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.add<-me.dif.ci.count[c(analysis.indices[[1]]),c(1,3),target.models]
ci.plot.data.add<-aperm(ci.plot.data.add, c(3,1,2))
dim(ci.plot.data.add)<-c(52,49*2)
ci.plot.vector.add<-round(100*apply(X=ci.plot.data.add, FUN=mean, MARGIN=1)/25, 1)


# Auxiliary plot (not used in paper) 
# Min-Max Marginal Effect Difference CI Performance of All Models

groups<-rep(c("additive", "weak interaction", "strong interaction"), each=length(target.models))
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot = c(ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong), model.name=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]), groups=factor(groups, levels=c("additive", "weak interaction", "strong interaction")))
p<-ggplot(df)
p+geom_point(aes(ci.plot, model.name, shape=groups), size=3)+ xlab("% of Data Sets with 95% Confidence Interval Containing True Value") + ylab("Estimation Model")+ theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=14, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14)) +geom_hline(yintercept=28.5)+scale_shape_discrete(name="DGP Category")
dev.copy2pdf(file="me_conf_interval_performance.pdf")

best.ind<-which(ci.plot.vector.add>=90 & ci.plot.vector.weak>=90 & ci.plot.vector.strong>=90)

model.name.pretty[best.ind]


# Strongly Interactive Models
additive.plot.data<-t(me.dif.abs.bias[analysis.indices[[5]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[5]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[6]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[6]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label





# me difference abs bias levels
# Moderately Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# the non-adaptive models

additive.plot.data<-t(me.dif.abs.bias[analysis.indices[[2]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[2]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[3]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[3]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[4]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[4]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# me difference abs bias levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

additive.plot.data<-t(me.dif.abs.bias[analysis.indices[[1]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[analysis.indices[[1]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label


# Figure 4: Min-Max Marginal Effect Difference Absolute Error Performance of Models
# with Accurate 95% Confidence Intervals

# comprehensive plot
# first, order the elements
additive.plot.data<-t(me.dif.abs.bias[1:115,1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.abs.bias[,3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.02, 0.3)) +  ylab("Average Absolute Error in Estimate of Min-Max Marginal Effect Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))
dev.copy2pdf(file="me-diff-bias.pdf")


str.un <- unique(df$strength)
group.un <- unique(df$group)
med <- c(); qt1 <- c(); qt2 <- c(); k=0
for(i in 1:length(str.un)){
  for(j in 1:length(group.un)){
    k = k+1
    df.s <- subset(df, subset = (strength == str.un[i] & group == group.un[j]) )
    med[k] <- median(df.s$additive.plot.vector)
    qt1[k] <- quantile(df.s$additive.plot.vector, probs=c(0.25))
    qt2[k] <- quantile(df.s$additive.plot.vector, probs=c(0.75))
  }
}
summary(med)
summary(c(qt1, qt2))




colvec<-c(rep(1, length(continuous.additive)), rep(2, length(min.max.secdif.10)), rep(3, length(min.max.secdif.20)), rep(4, length(min.max.secdif.30)), rep(5, length(min.max.secdif.40)), rep(6, length(min.max.secdif.50)))


# Auxiliary plot (not used in paper)
# compare absolute value of bias in min-max marginal effect estimation for different models
# compare logit with product term to saturated logit model
y1<-c(me.dif.abs.bias[1:115, 1, 2], me.dif.abs.bias[1:115, 3, 2])
y2<-c(me.dif.abs.bias[1:115, 1, 36], me.dif.abs.bias[1:115, 3, 36])
plot(y1~y2, main="Logit Product Only vs. All Polynomials", ylab="Abs ME dif. error for saturated logit (Model 2F)", xlab="Abs ME dif. error for logit w/ product term (Model 2B)", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)
legend("topleft", legend=c("0", "0.1", "0.2", "0.3", "0.4", "0.5"), pch=rep(1,6), pt.cex=(5/6+(1/6)*1:6), title=expression(paste("   ", abs(Delta*Delta["min-max"]), "   ")))
dev.copy2pdf(file="logit-product-v-polynomials-ME.pdf")


# Auxiliary plot (not used in paper)
# compare absolute value of bias in min-max marginal effect estimation for different models
# compare probit with product term to saturated probit model
y1<-c(me.dif.abs.bias[1:115, 1, 8], me.dif.abs.bias[1:115, 3, 8])
y2<-c(me.dif.abs.bias[1:115, 1, 39], me.dif.abs.bias[1:115, 3, 39])
plot(y1~y2, main="Probit Product Only vs. All Polynomials", ylab="Abs ME dif. error for saturated probit (Model 3F)", xlab="Abs ME dif. error for probit w/ product term (Model 3B)", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)
legend("topleft", legend=c("0", "0.1", "0.2", "0.3", "0.4", "0.5"), pch=rep(1,6), pt.cex=(5/6+(1/6)*1:6), title=expression(paste("   ", abs(Delta*Delta["min-max"]), "   ")))
dev.copy2pdf(file="probit-product-v-polynomials-ME.pdf")


# Auxiliary plot (not used in paper)
# compare absolute value of bias in min-max marginal effect estimation for different models
# compare saturated probit to saturated logit model
y1<-second.dif.abs.bias[1:155, 1, 36]
y2<-second.dif.abs.bias[1:155, 1, 39]
plot(y2~y1, xlab="logit, all squared terms", ylab="probit, all squared terms", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)


# Auxiliary plot (not used in paper)
# compare signed value of bias in min-max marginal effect estimation for different models
# compare probit with product term to saturated probit model
y1<-c(me.dif.bias[1:115, 1, 8], me.dif.bias[1:115, 3, 8])
y2<-c(me.dif.bias[1:115, 1, 39], me.dif.bias[1:115, 3, 39])
plot(y1~y2, main="Probit Product Only vs. All Polynomials", ylab="ME dif. bias for saturated probit (Model 3F)", xlab="ME dif. bias for probit w/ product term (Model 3B)", cex=(2-(1/6)*colvec))
abline(0,1,lty=2)
legend("topleft", legend=c("0", "0.1", "0.2", "0.3", "0.4", "0.5"), pch=rep(1,6), pt.cex=(5/6+(1/6)*1:6), title=expression(paste("   ", abs(Delta*Delta["min-max"]), "   ")))
dev.copy2pdf(file="probit-product-v-polynomials-ME-bias.pdf")





# marginal effects CI widths information
# Strongly Interactive Models
additive.plot.data<-t(me.dif.ci.width.mean[analysis.indices[[5]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[5]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[6]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[6]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label





# me difference abs bias levels
# Moderately Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# the non-adaptive models

additive.plot.data<-t(me.dif.ci.width.mean[analysis.indices[[2]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[2]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[3]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[3]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[4]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[4]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# me difference abs bias levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

additive.plot.data<-t(me.dif.ci.width.mean[analysis.indices[[1]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[analysis.indices[[1]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# Auxiliary plot (not used in paper)
# Width of 95% confidence intervals for min-max marginal effects difference
# first, order the elements
additive.plot.data<-t(me.dif.ci.width.mean[1:115,1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.ci.width.mean[,3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.1, 1)) +  ylab("Average Width of Confidence Intervals for Min-Max Marginal Effect Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

# Figure S-11
dev.copy2pdf(file="me-diff-ci-width.pdf")







# marginal effects CI widths information
# Strongly Interactive Models
additive.plot.data<-t(me.dif.rmse[analysis.indices[[5]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[5]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[6]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[6]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label





# me difference abs bias levels
# Moderately Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# the non-adaptive models

additive.plot.data<-t(me.dif.rmse[analysis.indices[[2]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[2]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[3]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[3]],3,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[4]],1,][,best.ind]))
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[4]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# me difference abs bias levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

additive.plot.data<-t(me.dif.rmse[analysis.indices[[1]],1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[analysis.indices[[1]],3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# Auxiliary plot (not used in paper)
# RMSE for min-max marginal effects difference
# first, order the elements
additive.plot.data<-t(me.dif.rmse[1:115,1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.rmse[,3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.02, 0.3)) +  ylab("RMSE of Second Difference Estimate (by DGP)") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))
dev.copy2pdf(file="me-diff-rmse.pdf")









# This entire section includes auxiliary plots
# replicating the absolute min-max marginal effects differences
# with signed min-max ME differences

# load in the results from our simulations
rm(list=ls())
load("2014-12-15_continuous-sim-results.RData")
library(ggplot2)

# make a "pretty" model name vector
model.name.pretty<-c("2A: logit basic", "2B: logit product", "7A: cloglog basic", "7B: cloglog product", "4A: log basic", "4B: log product", "3A: probit basic", "3B: probit product", "1A: regression basic", "1B: regression product", "6A: loglog basic", "6B: loglog product", "5A: logc basic", "5B: logc product", "2C: logit X2", "7C: cloglog X2", "4C: log X2", "3C: probit X2", "1C: regression X2", "6C: loglog X2", "5C: logc X2", "2D: logit Z2", "7D: cloglog Z2", "4D: log Z2", "3D: probit Z2", "1D: regression Z2", "6D: loglog Z2", "5C: logc Z2", "2E: logit X2 & Z2", "7E: cloglog X2 & Z2", "4E: log X2 & Z2", "3E: probit X2 & Z2", "1E: regression X2 & Z2", "6E: loglog X2 & Z2", "5E: logc X2 & Z2", "2F: logit saturated", "7F: cloglog saturated", "4F: log saturated", "3F: probit saturated", "1F: regression saturated", "6F: loglog saturated", "5F: logc saturated", "roc", "hl", "12: BIC", "8: AIC", "roc simple", "hl simple", "13: BIC simple", "9: AIC simple", "probit roc", "15: probit BIC", "11: probit AIC", "probit hl", "17: probit significance", "logit roc", "14: logit BIC", "10: logit AIC", "logit hl", "16: logit significance")



# exclude ROC/HL models
target.models<-c(1:42,45:46,49:50,52:53,55,57:58,60)


# compute the sign of the second difference
medif.sign.1 <- c()
for(i in 1:155){
  medif.sign.1[i] <- sign(me.dif.true(i)[1])
  if(medif.sign.1[i]==0){medif.sign.1[i] <- 1}
}
medif.sign.3 <- c()
for(i in 1:155){
  medif.sign.3[i] <- sign(me.dif.true(i)[3])
  if(medif.sign.3[i]==0){medif.sign.3[i] <- 1}
}

# Marginal Effect bias levels
# display the confidence interval performance 
ci.plot.data<-me.dif.ci.count[1:115,c(1,3),target.models]
ci.plot.data<-aperm(ci.plot.data, c(3,1,2))
dim(ci.plot.data)<-c(52,115*2)
ci.plot.vector<-round(100*apply(X=ci.plot.data, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.strong<-me.dif.ci.count[c(analysis.indices[[5]], analysis.indices[[6]]),c(1,3),target.models]
ci.plot.data.strong<-aperm(ci.plot.data.strong, c(3,1,2))
dim(ci.plot.data.strong)<-c(52,24*2)
ci.plot.vector.strong<-round(100*apply(X=ci.plot.data.strong, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.weak<-me.dif.ci.count[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]]),c(1,3),target.models]
ci.plot.data.weak<-aperm(ci.plot.data.weak, c(3,1,2))
dim(ci.plot.data.weak)<-c(52,42*2)
ci.plot.vector.weak<-round(100*apply(X=ci.plot.data.weak, FUN=mean, MARGIN=1)/25, 1)

ci.plot.data.add<-me.dif.ci.count[c(analysis.indices[[1]]),c(1,3),target.models]
ci.plot.data.add<-aperm(ci.plot.data.add, c(3,1,2))
dim(ci.plot.data.add)<-c(52,49*2)
ci.plot.vector.add<-round(100*apply(X=ci.plot.data.add, FUN=mean, MARGIN=1)/25, 1)


# Auxiliary plot (not used in paper) 
# 95% CI performance for min-max signed ME difference
groups<-rep(c("additive", "weak int.", "strong int."), each=length(target.models))
o<-order(ci.plot.vector, decreasing=F)
df<-data.frame(ci.plot = c(ci.plot.vector.add, ci.plot.vector.weak, ci.plot.vector.strong), model.name=factor(model.name.pretty[target.models], levels=model.name.pretty[target.models][o]), groups=factor(groups, levels=c("additive", "weak int.", "strong int.")))



best.ind<-which(ci.plot.vector.add>=90 & ci.plot.vector.weak>=90 & ci.plot.vector.strong>=90)


medif.sign.mat.1.5 <- matrix(data=medif.sign.1[analysis.indices[[5]]], ncol=length(analysis.indices[[5]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.1.6 <- matrix(data=medif.sign.1[analysis.indices[[6]]], ncol=length(analysis.indices[[6]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.3.5 <- matrix(data=medif.sign.3[analysis.indices[[5]]], ncol=length(analysis.indices[[5]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.3.6 <- matrix(data=medif.sign.3[analysis.indices[[6]]], ncol=length(analysis.indices[[6]]), nrow=length(best.ind), byrow=T)


# Strongly Interactive Models
additive.plot.data<-t(me.dif.bias[analysis.indices[[5]],1,][,best.ind])*medif.sign.mat.1.5
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[5]],3,][,best.ind])*medif.sign.mat.3.5)
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[6]],1,][,best.ind])*medif.sign.mat.1.6)
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[6]],3,][,best.ind])*medif.sign.mat.3.6)
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.strong<-additive.plot.vector
group.label.strong<-group.label





# me difference bias levels
# Moderately Interactive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation
# the non-adaptive models

medif.sign.mat.1.2 <- matrix(data=medif.sign.1[analysis.indices[[2]]], ncol=length(analysis.indices[[2]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.1.3 <- matrix(data=medif.sign.1[analysis.indices[[3]]], ncol=length(analysis.indices[[3]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.1.4 <- matrix(data=medif.sign.1[analysis.indices[[4]]], ncol=length(analysis.indices[[4]]), nrow=length(best.ind), byrow=T)

medif.sign.mat.3.2 <- matrix(data=medif.sign.3[analysis.indices[[2]]], ncol=length(analysis.indices[[2]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.3.3 <- matrix(data=medif.sign.3[analysis.indices[[3]]], ncol=length(analysis.indices[[3]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.3.4 <- matrix(data=medif.sign.3[analysis.indices[[4]]], ncol=length(analysis.indices[[4]]), nrow=length(best.ind), byrow=T)


additive.plot.data<-t(me.dif.bias[analysis.indices[[2]],1,][,best.ind])*medif.sign.mat.1.2
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[2]],3,][,best.ind])*medif.sign.mat.3.2)
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[3]],1,][,best.ind])*medif.sign.mat.1.3)
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[3]],3,][,best.ind])*medif.sign.mat.3.3)
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[4]],1,][,best.ind])*medif.sign.mat.1.4)
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[4]],3,][,best.ind])*medif.sign.mat.3.4)
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.weak<-additive.plot.vector
group.label.weak<-group.label


# me difference bias levels
# Additive Models
# look at distribution of performance within DGP categories
# counting each DGP as an observation

medif.sign.mat.1.1 <- matrix(data=medif.sign.1[analysis.indices[[1]]], ncol=length(analysis.indices[[1]]), nrow=length(best.ind), byrow=T)
medif.sign.mat.3.1 <- matrix(data=medif.sign.3[analysis.indices[[1]]], ncol=length(analysis.indices[[1]]), nrow=length(best.ind), byrow=T)


additive.plot.data<-t(me.dif.bias[analysis.indices[[1]],1,][,best.ind])*medif.sign.mat.1.1
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[analysis.indices[[1]],3,][,best.ind])*medif.sign.mat.3.1)
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)

group.label<-factor(model.name.pretty[best.ind], levels=model.name.pretty[best.ind[o]])
additive.plot.vector.add<-additive.plot.vector
group.label.add<-group.label



# comprehensive plot
# first, order the elements
additive.plot.data<-t(me.dif.bias[1:115,1,][,best.ind])
additive.plot.data<-cbind(additive.plot.data, t(me.dif.bias[,3,][,best.ind]))
additive.plot.vector<-as.vector(additive.plot.data)
plot.vec<-apply(X=additive.plot.data, FUN=median, MARGIN=1)
o<-order(plot.vec)


# Auxiliary plot (not used in paper) 
# signed error in min-max signed ME difference for models with 
additive.plot.vector<-c(additive.plot.vector.add, additive.plot.vector.weak, additive.plot.vector.strong)
group.label<-factor(c(rep(as.character(group.label.add), 49), rep(as.character(group.label.weak), 42), rep(as.character(group.label.strong), 24)), levels=model.name.pretty[best.ind[o]])
strength<-factor(c(rep("additive", length(additive.plot.vector.add)), rep("weak interaction", length(additive.plot.vector.weak)), rep("strong interaction", length(additive.plot.vector.strong))), levels=c("additive", "weak interaction", "strong interaction"))
df<-data.frame(additive.plot.vector, group=group.label, strength)
p<-ggplot(df)
p + geom_boxplot(aes(strength, additive.plot.vector)) +  coord_flip(ylim = c(-0.28, 0.28)) +  ylab("Average Error in Estimate of Min-Max Marginal Effect Difference") + xlab("") + ggtitle("") + theme(plot.title = element_text(size=16, face="bold")) + facet_grid(group~.) + theme(strip.text.y = element_text(angle=0, size=14) )+ theme(axis.text = element_text(size=10, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14))

# Figure S-9
dev.copy2pdf(file="me-diff-strength.pdf")









# analysis of model dependence / specification biases
#######################################################

# illustrate model dependence
library(foreign)
rm(list=ls())
source(file="trupry_2013-8-18.r")

# Plot bias in the predictions of the marginal effect,
# dPr(y)/dx

me<-function(x1,x2,dgp,mod){
  
  x1.1<-x1+0.01 
  x1.2<-x1-0.01
  x2.1<-x2
  x2.2<-x2
  
  pred.dat.1<-data.frame(x1=x1.1, x2=x2.1, x1x2=x1.1*x2.1, x1sq=x1.1^2, x2sq=x2.1^2 ,x1sqx2=(x1.1^2)*x2.1, x2sqx1=(x2.1^2)*x1.1, x1sqx2sq=(x1.1^2)*(x2.1^2))
  est.1<-predict(mod, newdata=pred.dat.1, type="response")
  
  pred.dat.2<-data.frame(x1=x1.2, x2=x2.2, x1x2=x1.2*x2.2, x1sq=x1.2^2, x2sq=x2.2^2 ,x1sqx2=(x1.2^2)*x2.2, x2sqx1=(x2.2^2)*x1.2, x1sqx2sq=(x1.2^2)*(x2.2^2))
  est.2<-predict(mod, newdata=pred.dat.2, type="response")
  
  est.me<-(est.1-est.2)/0.02
  true.me<-as.vector((trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))/0.02)
  
  return(c(est.me-true.me))
  
}

# Auxiliary: Signed bias in signed ME difference for DGP #40
# using a saturated logit model

dat<-read.dta(file="sim40.dta")
glm.mod<-glm(y~x1+x2+x1x2+x1sq+x2sq+x1sqx2+x2sqx1+x1sqx2sq, family=binomial(link="logit"), data=dat)
x1.plot<-seq(0, 1, 0.1)
low<-c()
mid<-c()
high<-c()
for(i in 1:length(x1.plot)){
  
  low[i]<-me(x1.plot[i], 0, 40, glm.mod)
  mid[i]<-me(x1.plot[i], 0.5, 40, glm.mod)
  high[i]<-me(x1.plot[i], 1, 40, glm.mod)
  
}

par(mar=c(5,6,4,2))
plot(low~x1.plot, type="l", ylim=c(-0.1, 0.15), main="Bias in Marginal Effect", xlab="x (independent variable)", ylab=expression(paste("Bias in ME:  ", group("[", hat(partialdiff*Pr(y))/partialdiff*x, "]")-group("[", partialdiff*Pr(y)/partialdiff*x, "]"))), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
lines(mid~x1.plot, lty=2)
lines(high~x1.plot, lty=3)
legend("topright", legend=c("z = 0", "z = 0.5", "z = 1"), lty=c(1, 2, 3))

dev.copy2pdf(file="marginal-effect-model-dependence-40.pdf")


# plot bias in predictions of Pr(Y)

pr.bias<-function(x1,x2,dgp,mod){
  
  pred.dat.1<-data.frame(x1=x1, x2=x2, x1x2=x1*x2, x1sq=x1^2, x2sq=x2^2 ,x1sqx2=(x1^2)*x2, x2sqx1=(x2^2)*x1, x1sqx2sq=(x1^2)*(x2^2))
  est.1<-predict(mod, newdata=pred.dat.1, type="response")
  
  true<-as.vector(trupry(dgp,x1,x2))
  
  return(c(est.1-true))
  
}

# Auxiliary: Signed bias in predicted probabilities for DGP #40
# using a saturated logit model

low.pr<-c()
mid.pr<-c()
high.pr<-c()
for(i in 1:length(x1.plot)){
  
  low.pr[i]<-pr.bias(x1.plot[i], 0, 40, glm.mod)
  mid.pr[i]<-pr.bias(x1.plot[i], 0.5, 40, glm.mod)
  high.pr[i]<-pr.bias(x1.plot[i], 1, 40, glm.mod)
  
}

par(mar=c(5,6,4,2))
plot(low.pr~x1.plot, type="l", ylim=c(-0.1, 0.15), main="Bias in Predicted Probabilities", xlab="x (independent variable)", ylab=expression(paste("Bias in Predicted Probability: ", group("[", hat(Pr(y)), "]") - group("[", Pr(y), "]"))) , cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
lines(mid.pr~x1.plot, lty=2)
lines(high.pr~x1.plot, lty=3)
legend("topright", legend=c("z = 0", "z = 0.5", "z = 1"), lty=c(1, 2, 3))

dev.copy2pdf(file="pry-effect-model-dependence-40.pdf")





# Auxiliary: Signed bias in signed ME difference for DGP #66
# using a saturated logit model

dat<-read.dta(file="sim66.dta")
glm.mod<-glm(y~x1+x2+x1x2+x1sq+x2sq+x1sqx2+x2sqx1+x1sqx2sq, family=binomial(link="logit"), data=dat)
x1.plot<-seq(0, 1, 0.1)
low<-c()
mid<-c()
high<-c()
for(i in 1:length(x1.plot)){
  
  low[i]<-me(x1.plot[i], 0, 66, glm.mod)
  mid[i]<-me(x1.plot[i], 0.5, 66, glm.mod)
  high[i]<-me(x1.plot[i], 1, 66, glm.mod)
  
}

par(mar=c(5,6,4,2))
plot(low~x1.plot, type="l", ylim=c(-0.1, 0.35), main="Bias in Marginal Effect", xlab="x (independent variable)", ylab=expression(paste("Bias in ME:  ", group("[", hat(partialdiff*Pr(y))/partialdiff*x, "]")-group("[", partialdiff*Pr(y)/partialdiff*x, "]"))), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
lines(mid~x1.plot, lty=2)
lines(high~x1.plot, lty=3)
legend("topright", legend=c("z = 0", "z = 0.5", "z = 1"), lty=c(1, 2, 3))

dev.copy2pdf(file="marginal-effect-model-dependence-66.pdf")


# Auxiliary: Signed bias in predicted probability for DGP #66
# using a saturated logit model

low.pr<-c()
mid.pr<-c()
high.pr<-c()
for(i in 1:length(x1.plot)){
  
  low.pr[i]<-pr.bias(x1.plot[i], 0, 66, glm.mod)
  mid.pr[i]<-pr.bias(x1.plot[i], 0.5, 66, glm.mod)
  high.pr[i]<-pr.bias(x1.plot[i], 1, 66, glm.mod)
  
}

par(mar=c(5,6,4,2))
plot(low.pr~x1.plot, type="l", ylim=c(-0.1, 0.15), main="Bias in Predicted Probabilities", xlab="x (independent variable)", ylab=expression(paste("Bias in Predicted Probability: ", group("[", hat(Pr(y)), "]") - group("[", Pr(y), "]"))), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2 )
lines(mid.pr~x1.plot, lty=2)
lines(high.pr~x1.plot, lty=3)
legend("topright", legend=c("z = 0", "z = 0.5", "z = 1"), lty=c(1, 2, 3))

dev.copy2pdf(file="pry-effect-model-dependence-66.pdf")







me.dif<-function(x1,dgp,mod){
  
  x1.1<-x1+0.01 
  x1.2<-x1-0.01
  x2.1<-0
  x2.2<-0
  
  pred.dat.1<-data.frame(x1=x1.1, x2=x2.1, x1x2=x1.1*x2.1, x1sq=x1.1^2, x2sq=x2.1^2 ,x1sqx2=(x1.1^2)*x2.1, x2sqx1=(x2.1^2)*x1.1, x1sqx2sq=(x1.1^2)*(x2.1^2))
  est.1<-predict(mod, newdata=pred.dat.1, type="response")
  
  pred.dat.2<-data.frame(x1=x1.2, x2=x2.2, x1x2=x1.2*x2.2, x1sq=x1.2^2, x2sq=x2.2^2 ,x1sqx2=(x1.2^2)*x2.2, x2sqx1=(x2.2^2)*x1.2, x1sqx2sq=(x1.2^2)*(x2.2^2))
  est.2<-predict(mod, newdata=pred.dat.2, type="response")
  
  est.me.1<-(est.1-est.2)/0.02
  true.me.1<-as.vector((trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))/0.02)
  
  
  x1.1<-x1+0.01 
  x1.2<-x1-0.01
  x2.1<-1
  x2.2<-1
  
  pred.dat.1<-data.frame(x1=x1.1, x2=x2.1, x1x2=x1.1*x2.1, x1sq=x1.1^2, x2sq=x2.1^2 ,x1sqx2=(x1.1^2)*x2.1, x2sqx1=(x2.1^2)*x1.1, x1sqx2sq=(x1.1^2)*(x2.1^2))
  est.1<-predict(mod, newdata=pred.dat.1, type="response")
  
  pred.dat.2<-data.frame(x1=x1.2, x2=x2.2, x1x2=x1.2*x2.2, x1sq=x1.2^2, x2sq=x2.2^2 ,x1sqx2=(x1.2^2)*x2.2, x2sqx1=(x2.2^2)*x1.2, x1sqx2sq=(x1.2^2)*(x2.2^2))
  est.2<-predict(mod, newdata=pred.dat.2, type="response")
  
  est.me.2<-(est.1-est.2)/0.02
  true.me.2<-as.vector((trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))/0.02)
  
  est.me.dif <- est.me.2 - est.me.1
  true.me.dif <- true.me.2 - true.me.1
  
  
  return(c(est.me.dif-true.me.dif))
  
}

dat<-read.dta(file="sim40.dta")
glm.mod<-glm(y~x1+x2+x1x2+x1sq+x2sq+x1sqx2+x2sqx1+x1sqx2sq, family=binomial(link="logit"), data=dat)
x1.plot<-seq(0, 1, 0.1)
dif<-c()
for(i in 1:length(x1.plot)){
  
  dif[i]<-me.dif(x1.plot[i], 40, glm.mod)
  
}

# Auxiliary: signed bias in signed min-max ME difference for a saturated logit model of DGP #40

par(mar=c(5,6,4,2))
plot(-dif~x1.plot, type="l", main="Bias in Marginal Effect Difference", xlab="x (independent variable)", ylab=expression(paste("Bias in ME Difference:  ", group("[", hat(Delta)*ME["min-max"], "]")-group("[", Delta*ME["min-max"], "]"))), cex.lab=1.1, cex.axis=1.1, cex.main=1.1, cex.sub=1.1)
dev.copy2pdf(file="me-dif-model-dependence-40.pdf")




dat<-read.dta(file="sim66.dta")
glm.mod<-glm(y~x1+x2+x1x2+x1sq+x2sq+x1sqx2+x2sqx1+x1sqx2sq, family=binomial(link="logit"), data=dat)
x1.plot<-seq(0, 1, 0.1)
dif<-c()
for(i in 1:length(x1.plot)){
  
  dif[i]<-me.dif(x1.plot[i], 66, glm.mod)
  
}

# Auxiliary: signed bias in signed min-max ME difference for a saturated logit model of DGP #66
par(mar=c(5,6,4,2))
plot(-dif~x1.plot, type="l", main="Bias in Marginal Effect Difference", xlab="x (independent variable)", ylab=expression(paste("Bias in ME Difference:  ", group("[", hat(Delta)*ME["min-max"], "]")-group("[", Delta*ME["min-max"], "]"))), cex.lab=1.1, cex.axis=1.1, cex.main=1.1, cex.sub=1.1)
dev.copy2pdf(file="me-dif-model-dependence-66.pdf")








# illustrate model dependence
library(foreign)
rm(list=ls())
source(file="trupry_2013-8-18.r")

set.seed(123456)

# Plot bias in the predictions of the marginal effect,
# dPr(y)/dx

me<-function(x1,x2,dgp,mod){
  
  x1.1<-x1+0.01 
  x1.2<-x1-0.01
  x2.1<-x2
  x2.2<-x2
  
  pred.dat.1<-data.frame(x1=x1.1, x2=x2.1, x1x2=x1.1*x2.1, x1sq=x1.1^2, x2sq=x2.1^2 ,x1sqx2=(x1.1^2)*x2.1, x2sqx1=(x2.1^2)*x1.1, x1sqx2sq=(x1.1^2)*(x2.1^2))
  est.1<-predict(mod, newdata=pred.dat.1, type="response")
  
  pred.dat.2<-data.frame(x1=x1.2, x2=x2.2, x1x2=x1.2*x2.2, x1sq=x1.2^2, x2sq=x2.2^2 ,x1sqx2=(x1.2^2)*x2.2, x2sqx1=(x2.2^2)*x1.2, x1sqx2sq=(x1.2^2)*(x2.2^2))
  est.2<-predict(mod, newdata=pred.dat.2, type="response")
  
  est.me<-(est.1-est.2)/0.02
  true.me<-as.vector((trupry(dgp,x1.1,x2.1)-trupry(dgp,x1.2,x2.2))/0.02)
  
  return(c(est.me-true.me))
  
}

x1.plot<-seq(0, 1, 0.1)
me.bias.mat<-matrix(data=NA, nrow=115, ncol=length(x1.plot))

pb<-txtProgressBar(min=1, max=115, initial=0, style=3)
for(j in 1:115){
  
  setTxtProgressBar(pb, value=j)
  if(j<10){dat<-read.dta(file=paste("sim0",j,".dta", sep = ""))}
  else{dat<-read.dta(file=paste("sim",j,".dta", sep = ""))}
  glm.mod<-glm(y~x1+x2+x1x2+x1sq+x2sq+x1sqx2+x2sqx1+x1sqx2sq, family=binomial(link="logit"), data=dat)
  for(i in 1:length(x1.plot)){
    me.bias.mat[j,i]<-me(x1.plot[i], 0.5, j, glm.mod)
  }
  
  
}
close(pb)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115
analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50)

#analysis.names<-c("additive", "second difference = 0.1", "second difference = 0.2", "second difference = 0.3", "second difference = 0.4", "second difference = 0.5")

analysis.names<-c("additive", expression(abs(Delta*Delta["min-max"]) == 0.1), expression(abs(Delta*Delta["min-max"]) == 0.2), expression(abs(Delta*Delta["min-max"]) == 0.3), expression(abs(Delta*Delta["min-max"]) == 0.4), expression(abs(Delta*Delta["min-max"]) == 0.5))


me.bias.vec<-as.vector(me.bias.mat)
dgp<-rep(1:115, length(x1.plot))  #, each=length(x1.plot))
loc<-factor(paste("X = ", rep(x1.plot, each=115)), levels=paste( "X = ", x1.plot))

dgp.type<-c()
for(i in 1:length(analysis.indices)){
  dgp.type[which(dgp%in%analysis.indices[[i]])]<-analysis.names[i]
}
dgp.type<-factor(dgp.type, levels=analysis.names)

# Figure 7: Model Dependence in Inferences about marginal effect estimates
# difference between estimated and true value of ME estimate for targeted values of X
ylabs <- parse(text=analysis.names)

library(ggplot2)
library(grid)
df<-data.frame(me.bias.vec, dgp=factor(dgp, levels=1:115), loc=loc, dgp.type=dgp.type)
p<-ggplot(df)
p <- p + geom_jitter(aes(me.bias.vec, dgp.type, colour = dgp.type)) + scale_y_discrete(labels=ylabs) + facet_grid(loc~.) + ylab("DGP Type") + xlab("Bias in Marginal Effect Estimate") + theme(legend.position = "none", axis.title.y = element_text(vjust=3.2), axis.title.x = element_text(vjust=0.1), strip.text.y = element_text(angle=270, size=14) )+ theme(axis.text = element_text(size=12, color="black"), axis.title = element_text(size=14), legend.text = element_text(size=14), legend.title = element_text(size=14)) + scale_colour_grey(start = 0.6, end = 0.2) + theme(plot.margin = unit(c(0.2,1,1,1), "cm")) 
suppressWarnings(print(p))

dev.copy2pdf(file="asymptotic_me_bias.pdf")

idx <- which(loc=="X =  0.5" | loc=="X =  0.6"| loc=="X =  0.7" | loc=="X =  0.4"| loc=="X =  0.3")
sum(abs(me.bias.vec[idx])>=0.05)

idx <- which(loc=="X =  0.5")
sum(abs(me.bias.vec[idx])>=0.05)

idx <- which(loc=="X =  0.4")
sum(abs(me.bias.vec[idx])>=0.05)

idx <- which(loc=="X =  0.3")
sum(abs(me.bias.vec[idx])>=0.05)

idx <- which(loc=="X =  0.6")
sum(abs(me.bias.vec[idx])>=0.05)

idx <- which(loc=="X =  0.7")
sum(abs(me.bias.vec[idx])>=0.05)


###########################################
# 
# Part Four: Size/Power Analysis
#
###########################################


###########################################
# size/power analysis for continuous DGPs
# N = 1000
###########################################

rm(list=ls())
library(foreign)
set.seed(654321)

significance<-function(mu, sigma){
  
  mat.1<-matrix(dat=c(1,1,0,0))
  mat.2<-matrix(dat=c(1,0,0,0))
  mat.3<-matrix(dat=c(1,1,1,1))
  mat.4<-matrix(dat=c(1,0,1,0))
  
  require(mvtnorm)
  samples<-rmvnorm(1000, mu, sigma)
  effect.1<-samples%*%mat.1
  effect.2<-samples%*%mat.2
  effect.3<-samples%*%mat.3
  effect.4<-samples%*%mat.4
  
  dif.low<-effect.1-effect.2
  dif.high<-effect.3-effect.4
  
  dif.dist<-dif.high-dif.low
  dif.quant<-quantile(dif.dist, probs=c(0.975, 0.025))
  
  ifelse(sign(dif.quant[1])==sign(dif.quant[2]), return(1), return(0))
  
}

sig.mat<-matrix(data=NA, nrow=115, ncol=100)
sig.product<-matrix(data=NA, nrow=115, ncol=100)
sig.mat.2<-matrix(data=NA, nrow=115, ncol=100)
sig.product.2<-matrix(data=NA, nrow=115, ncol=100)


pb<-txtProgressBar(initial=0, min=1, max=115, style=3)
for(i in 1:115){
  
  setTxtProgressBar(pb, value=i)
  if(i<10){master.dat<-read.dta(file=paste("sim0",i,".dta", sep = ""))
  }else{master.dat<-read.dta(file=paste("sim",i,".dta", sep = ""))}
  
  
  for(j in 1:100){
    
    baby.dat<-master.dat[(1+1000*(j-1)):(1000*j),]
    model<-glm(y~x1+x2+x1x2, family=binomial(link=probit), data=baby.dat)
    sig.product[i,j]<-ifelse(coefficients(summary(model))[4,4]<0.05, 1, 0)
    sig.mat[i,j]<-significance(coefficients(model), vcov(model))

    model.2<-glm(y~x1+x2+x1x2, family=binomial(link=logit), data=baby.dat)
    sig.product.2[i,j]<-ifelse(coefficients(summary(model.2))[4,4]<0.05, 1, 0)
    sig.mat.2[i,j]<-significance(coefficients(model.2), vcov(model.2))
    
  }
  
  
}
close(pb)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115

power.secdif<-c()

power.secdif[1]<-100*mean(apply(X=sig.mat[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.secdif[2]<-100*mean(apply(X=sig.mat[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.secdif[3]<-100*mean(apply(X=sig.mat[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.secdif[4]<-100*mean(apply(X=sig.mat[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.secdif[5]<-100*mean(apply(X=sig.mat[min.max.secdif.50,], FUN=mean, MARGIN=1))

# Figure 6(a): Power Assessment of Min-Max Second Difference
par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.secdif~second.difference, type="l", ylim=c(-3, 82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which   ", abs(Delta*Delta[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)
dev.copy2pdf(file="minmax-secdif-power.pdf")

# Figure 5(a): Size Assessment of Min-Max Second Difference
secdif.size<-100*apply(X=sig.mat[continuous.additive,], FUN=mean, MARGIN=1)
max(secdif.size)
boxplot(secdif.size, xlab="", ylab=expression(paste("% of Data Sets in which   ", abs(Delta*Delta[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)
dev.copy2pdf(file="minmax-secdif-size.pdf")


power.product<-c()

power.product[1]<-100*mean(apply(X=sig.product[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.product[2]<-100*mean(apply(X=sig.product[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.product[3]<-100*mean(apply(X=sig.product[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.product[4]<-100*mean(apply(X=sig.product[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.product[5]<-100*mean(apply(X=sig.product[min.max.secdif.50,], FUN=mean, MARGIN=1))


# Figure 6(b): Power Assessment of Product Term Coefficient
par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.product~second.difference, type="l", ylim=c(-3, 82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which the Product Term is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)
dev.copy2pdf(file="product-power.pdf")

100 - max(power.product[5])

# Figure 5(b): Size Assessment of Product Term Coefficient
product.size<-100*apply(X=sig.product[continuous.additive,], FUN=mean, MARGIN=1)
max(product.size)
boxplot(product.size, xlab="", ylab="% of Data Sets in which the Product Term is Stat. Signif.", cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)
dev.copy2pdf(file="product-size.pdf")





# Auxiliary plot (not used in paper): 
# Repetition of the above analysis with logit model rather than probit model
power.secdif<-c()

power.secdif[1]<-100*mean(apply(X=sig.mat.2[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.secdif[2]<-100*mean(apply(X=sig.mat.2[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.secdif[3]<-100*mean(apply(X=sig.mat.2[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.secdif[4]<-100*mean(apply(X=sig.mat.2[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.secdif[5]<-100*mean(apply(X=sig.mat.2[min.max.secdif.50,], FUN=mean, MARGIN=1))

# Power Assessment of Min-Max Second Difference
par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.secdif~second.difference, type="l", ylim=c(-3, 82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which   ", abs(Delta*Delta[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-14(a)
dev.copy2pdf(file="minmax-secdif-power-logit.pdf")

# Size Assessment of Min-Max Second Difference
secdif.size<-100*apply(X=sig.mat.2[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(secdif.size, xlab="", ylab=expression(paste("% of Data Sets in which   ", abs(Delta*Delta[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-13(a)
dev.copy2pdf(file="minmax-secdif-size-logit.pdf")




power.product<-c()

power.product[1]<-100*mean(apply(X=sig.product.2[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.product[2]<-100*mean(apply(X=sig.product.2[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.product[3]<-100*mean(apply(X=sig.product.2[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.product[4]<-100*mean(apply(X=sig.product.2[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.product[5]<-100*mean(apply(X=sig.product.2[min.max.secdif.50,], FUN=mean, MARGIN=1))

# Power Assessment of Product Term Coefficient
par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.product~second.difference, type="l", ylim=c(-3, 82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which the Product Term is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-14(b)
dev.copy2pdf(file="product-power-logit.pdf")

# Size Assessment of Product Term Coefficient
product.size<-100*apply(X=sig.product.2[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(product.size, xlab="", ylab="% of Data Sets in which the Product Term is Stat. Signif.", cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-13(b)
dev.copy2pdf(file="product-size-logit.pdf")



###########################################
# size/power analysis for continuous DGPs
# using min-max marginal effects differences
# N = 1000
###########################################

rm(list=ls())
set.seed(123456)
library(foreign)
library(mvtnorm)
library(snow)

# start up cluster
NODES<-10 #number of cluster nodes to establish
cl <- makeCluster(rep("localhost", NODES), type = "SOCK")
clusterSetupRNG (cl, type = "RNGstream", seed=rep(7564, 6))

invisible(clusterEvalQ(cl, library(VGAM)))


me<-function(X, beta, dir, form, link.name){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    link.fun<-binomial(link = link.name)$linkinv
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
  if(dir=="x2"){
    x1.1<-x1
    x1.2<-x1
    x2.1<-x2+0.01
    x2.2<-x2-0.01
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    link.fun<-binomial(link = link.name)$linkinv
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
}


clusterExport(cl, list="me")

me.dif<-function(beta, form, link.name){
  
  x1.1<-c(.5,.5)
  x2.1<-c(1,.75)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5,.5)
  x2.2<-c(0,.25)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me(X.1, beta=beta, dir="x1", form, link.name)
  me.2.1<-me(X.2, beta=beta, dir="x1", form, link.name)
  
  x1.1<-c(1,.75)
  x2.1<-c(.5,.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0,.25)
  x2.2<-c(.5,.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me(X.1, beta=beta, dir="x2", form, link.name)
  me.2.2<-me(X.2, beta=beta, dir="x2", form, link.name)
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}


clusterExport(cl, list="me.dif")


sig.mat<-matrix(data=NA, nrow=115, ncol=100)
sig.product<-matrix(data=NA, nrow=115, ncol=100)
sig.mat.2<-matrix(data=NA, nrow=115, ncol=100)
sig.product.2<-matrix(data=NA, nrow=115, ncol=100)

pb<-txtProgressBar(initial=0, min=1, max=115, style=3)
for(i in 1:115){
  
  setTxtProgressBar(pb, value=i)
  if(i<10){master.dat<-read.dta(file=paste("sim0",i,".dta", sep = ""))
  }else{master.dat<-read.dta(file=paste("sim",i,".dta", sep = ""))}
  
  
  for(j in 1:100){
    
    baby.dat<-master.dat[(1+1000*(j-1)):(1000*j),]
    model<-glm(y~x1+x2+x1x2, family=binomial(link=probit), data=baby.dat)
    samples<-rmvnorm(1000, coefficients(model), vcov(model))
    ci.draws<-parApply(cl, X=samples, FUN=me.dif, MARGIN=1, form = ~x1+x2+I(x1*x2), link="probit")
    ci.bounds<-quantile(ci.draws[1,], probs=c(0.025, 0.975))
    sig.mat[i,j]<-ifelse(sign(ci.bounds[1]) == sign(ci.bounds[2]), 1, 0)
    
    model.2<-glm(y~x1+x2+x1x2, family=binomial(link=logit), data=baby.dat)
    samples.2<-rmvnorm(1000, coefficients(model.2), vcov(model.2))
    ci.draws.2<-parApply(cl, X=samples.2, FUN=me.dif, MARGIN=1, form = ~x1+x2+I(x1*x2), link="logit")
    ci.bounds.2<-quantile(ci.draws.2[1,], probs=c(0.025, 0.975))
    sig.mat.2[i,j]<-ifelse(sign(ci.bounds.2[1]) == sign(ci.bounds.2[2]), 1, 0)
    
  }
  
  
}
close(pb)
stopCluster(cl)

save.image("me-power.RData")

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115

power.secdif<-c()

power.secdif[1]<-100*mean(apply(X=sig.mat[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.secdif[2]<-100*mean(apply(X=sig.mat[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.secdif[3]<-100*mean(apply(X=sig.mat[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.secdif[4]<-100*mean(apply(X=sig.mat[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.secdif[5]<-100*mean(apply(X=sig.mat[min.max.secdif.50,], FUN=mean, MARGIN=1))

par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.secdif~second.difference, type="l", xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which   ", abs(Delta*"ME"[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2, ylim=c(0, 90))
abline(h=5, lty=3)

# Figure S-12(b)
dev.copy2pdf(file="minmax-me-power.pdf")

secdif.size<-100*apply(X=sig.mat[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(secdif.size, xlab=expression(paste("true  ", abs(Delta*Delta[" min-max "]), " = 0")), ylab=expression(paste("% of Data Sets in which   ", abs(Delta*"ME"[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-12(a)
dev.copy2pdf(file="minmax-me-size.pdf")



# Auxiliary plot (not used in paper): 
# Repetition of the above analysis with logit model rather than probit model
power.secdif<-c()

power.secdif[1]<-100*mean(apply(X=sig.mat.2[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.secdif[2]<-100*mean(apply(X=sig.mat.2[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.secdif[3]<-100*mean(apply(X=sig.mat.2[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.secdif[4]<-100*mean(apply(X=sig.mat.2[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.secdif[5]<-100*mean(apply(X=sig.mat.2[min.max.secdif.50,], FUN=mean, MARGIN=1))

# Power Assessment of Min-Max Second Difference
par(mar=c(5,5,2,2))
second.difference<-c(0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.secdif~second.difference, type="l", ylim=c(-3, 84), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("% of Data Sets in which   ", abs(Delta*"ME"[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-15(b)
dev.copy2pdf(file="minmax-me-power-logit.pdf")

# Size Assessment of Min-Max Second Difference
secdif.size<-100*apply(X=sig.mat.2[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(secdif.size, xlab=expression(paste("true  ", abs(Delta*Delta[" min-max "]), " = 0")), ylab=expression(paste("% of Data Sets in which   ", abs(Delta*"ME"[" min-max "]), "   is Stat. Signif.")), cex.lab=1.2, cex.axis=1.2, cex.main=1.2, cex.sub=1.2)
abline(h=5, lty=3)

# Figure S-15(a)
dev.copy2pdf(file="minmax-me-size-logit.pdf")














###########################################
# size/power analysis for continuous DGPs
# N = 500
###########################################

rm(list=ls())
set.seed(123456)
library(foreign)

significance<-function(mu, sigma){
  
  mat.1<-matrix(dat=c(1,1,0,0))
  mat.2<-matrix(dat=c(1,0,0,0))
  mat.3<-matrix(dat=c(1,1,1,1))
  mat.4<-matrix(dat=c(1,0,1,0))
  
  require(mvtnorm)
  samples<-rmvnorm(1000, mu, sigma)
  effect.1<-samples%*%mat.1
  effect.2<-samples%*%mat.2
  effect.3<-samples%*%mat.3
  effect.4<-samples%*%mat.4
  
  dif.low<-effect.1-effect.2
  dif.high<-effect.3-effect.4
  
  dif.dist<-dif.high-dif.low
  dif.quant<-quantile(dif.dist, probs=c(0.975, 0.025))
  
  ifelse(sign(dif.quant[1])==sign(dif.quant[2]), return(1), return(0))
  
}

sig.mat<-matrix(data=NA, nrow=115, ncol=100)
sig.product<-matrix(data=NA, nrow=115, ncol=100)

pb<-txtProgressBar(initial=0, min=1, max=115, style=3)
for(i in 1:115){
  
  setTxtProgressBar(pb, value=i)
  if(i<10){master.dat<-read.dta(file=paste("sim0",i,".dta", sep = ""))
  }else{master.dat<-read.dta(file=paste("sim",i,".dta", sep = ""))}
  
  
  for(j in 1:100){
    
    baby.dat<-master.dat[(1+500*(j-1)):(500*j),]
    model<-glm(y~x1+x2+x1x2, family=binomial(link=probit), data=baby.dat)
    sig.product[i,j]<-ifelse(coefficients(summary(model))[4,4]<0.05, 1, 0)
    
    sig.mat[i,j]<-significance(coefficients(model), vcov(model))
    
  }
  
  
}
close(pb)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115

power.secdif<-c()

power.secdif[1]<-mean(apply(X=sig.mat[continuous.additive,], FUN=mean, MARGIN=1))
power.secdif[2]<-mean(apply(X=sig.mat[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.secdif[3]<-mean(apply(X=sig.mat[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.secdif[4]<-mean(apply(X=sig.mat[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.secdif[5]<-mean(apply(X=sig.mat[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.secdif[6]<-mean(apply(X=sig.mat[min.max.secdif.50,], FUN=mean, MARGIN=1))

par(mar=c(5,5,4,2))
second.difference<-c(0, 0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.secdif~second.difference, type="l", ylim=c(-0.03, 0.82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("Pr(estimated   ", abs(Delta*Delta[" min-max "]), "   is stat. significant)")))
abline(h=0.05, lty=3)
#dev.copy2pdf(file="minmax-secdif-power.pdf")

secdif.size<-apply(X=sig.mat[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(secdif.size, xlab=expression(paste("true  ", abs(Delta*Delta[" min-max "]), " = 0")), ylab=expression(paste("Pr(estimated   ", abs(Delta*Delta[" min-max "]), "   is stat. significant)")))
#dev.copy2pdf(file="minmax-secdif-size.pdf")


power.product<-c()

power.product[1]<-mean(apply(X=sig.product[continuous.additive,], FUN=mean, MARGIN=1))
power.product[2]<-mean(apply(X=sig.product[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.product[3]<-mean(apply(X=sig.product[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.product[4]<-mean(apply(X=sig.product[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.product[5]<-mean(apply(X=sig.product[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.product[6]<-mean(apply(X=sig.product[min.max.secdif.50,], FUN=mean, MARGIN=1))


par(mar=c(5,5,4,2))
second.difference<-c(0, 0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.product~second.difference, type="l", ylim=c(-0.03, 0.82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("Pr(estimated product term is stat. significant)")))
abline(h=0.05, lty=3)
#dev.copy2pdf(file="product-power.pdf")

product.size<-apply(X=sig.product[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(product.size, xlab=expression(paste("true  ", abs(Delta*Delta[" min-max "]), " = 0")), ylab="Pr(estimated product term is stat. significant)")
#dev.copy2pdf(file="product-size.pdf")


###########################################
# size/power analysis for continuous DGPs
# N = 100
###########################################

rm(list=ls())
set.seed(123456)
library(foreign)

significance<-function(mu, sigma){
  
  mat.1<-matrix(dat=c(1,1,0,0))
  mat.2<-matrix(dat=c(1,0,0,0))
  mat.3<-matrix(dat=c(1,1,1,1))
  mat.4<-matrix(dat=c(1,0,1,0))
  
  require(mvtnorm)
  samples<-rmvnorm(1000, mu, sigma)
  effect.1<-samples%*%mat.1
  effect.2<-samples%*%mat.2
  effect.3<-samples%*%mat.3
  effect.4<-samples%*%mat.4
  
  dif.low<-effect.1-effect.2
  dif.high<-effect.3-effect.4
  
  dif.dist<-dif.high-dif.low
  dif.quant<-quantile(dif.dist, probs=c(0.975, 0.025))
  
  ifelse(sign(dif.quant[1])==sign(dif.quant[2]), return(1), return(0))
  
}

sig.mat<-matrix(data=NA, nrow=115, ncol=100)
sig.product<-matrix(data=NA, nrow=115, ncol=100)

pb<-txtProgressBar(initial=0, min=1, max=115, style=3)
for(i in 1:115){
  
  setTxtProgressBar(pb, value=i)
  if(i<10){master.dat<-read.dta(file=paste("sim0",i,".dta", sep = ""))
  }else{master.dat<-read.dta(file=paste("sim",i,".dta", sep = ""))}
  
  
  for(j in 1:100){
    
    baby.dat<-master.dat[(1+100*(j-1)):(100*j),]
    model<-glm(y~x1+x2+x1x2, family=binomial(link=probit), data=baby.dat)
    sig.product[i,j]<-ifelse(coefficients(summary(model))[4,4]<0.05, 1, 0)
    
    sig.mat[i,j]<-significance(coefficients(model), vcov(model))
    
  }
  
  
}
close(pb)

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115

power.secdif<-c()

power.secdif[1]<-mean(apply(X=sig.mat[continuous.additive,], FUN=mean, MARGIN=1))
power.secdif[2]<-mean(apply(X=sig.mat[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.secdif[3]<-mean(apply(X=sig.mat[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.secdif[4]<-mean(apply(X=sig.mat[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.secdif[5]<-mean(apply(X=sig.mat[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.secdif[6]<-mean(apply(X=sig.mat[min.max.secdif.50,], FUN=mean, MARGIN=1))

par(mar=c(5,5,4,2))
second.difference<-c(0, 0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.secdif~second.difference, type="l", ylim=c(-0.03, 0.82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("Pr(estimated   ", abs(Delta*Delta[" min-max "]), "   is stat. significant)")))
abline(h=0.05, lty=3)
#dev.copy2pdf(file="minmax-secdif-power.pdf")

secdif.size<-apply(X=sig.mat[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(secdif.size, xlab=expression(paste("true  ", abs(Delta*Delta[" min-max "]), " = 0")), ylab=expression(paste("Pr(estimated   ", abs(Delta*Delta[" min-max "]), "   is stat. significant)")))
#dev.copy2pdf(file="minmax-secdif-size.pdf")


power.product<-c()

power.product[1]<-mean(apply(X=sig.product[continuous.additive,], FUN=mean, MARGIN=1))
power.product[2]<-mean(apply(X=sig.product[min.max.secdif.10,], FUN=mean, MARGIN=1))
power.product[3]<-mean(apply(X=sig.product[min.max.secdif.20,], FUN=mean, MARGIN=1))
power.product[4]<-mean(apply(X=sig.product[min.max.secdif.30,], FUN=mean, MARGIN=1))
power.product[5]<-mean(apply(X=sig.product[min.max.secdif.40,], FUN=mean, MARGIN=1))
power.product[6]<-mean(apply(X=sig.product[min.max.secdif.50,], FUN=mean, MARGIN=1))


par(mar=c(5,5,4,2))
second.difference<-c(0, 0.1, 0.2, 0.3, 0.4, 0.5)
plot(power.product~second.difference, type="l", ylim=c(-0.03, 0.82), xlab=expression(paste("true ", abs(Delta*Delta[" min-max "]))), ylab=expression(paste("Pr(estimated product term is stat. significant)")))
abline(h=0.05, lty=3)
#dev.copy2pdf(file="product-power.pdf")

product.size<-apply(X=sig.product[continuous.additive,], FUN=mean, MARGIN=1)
boxplot(product.size, xlab=expression(paste("true  ", abs(Delta*Delta[" min-max "]), " = 0")), ylab="Pr(estimated product term is stat. significant)")
#dev.copy2pdf(file="product-size.pdf")

# Footnote 30: proportion of significant results for min-max secdif = 0.5
power.product[6]    # 0.1241667
power.secdif[6]     # 0.125






####################################################
# confidence interval accuracy for continuous DGPs
# probit model
# N = 100
####################################################

rm(list=ls())
library(foreign)
library(mvtnorm)
library(snow)

NODES<-2 #number of cluster nodes to establish
cl <- makeCluster(rep("localhost", NODES), type = "SOCK")
clusterSetupRNG (cl, type = "RNGstream", seed=rep(398123, 6))


source(file="trupry_2013-8-18.r")



#####################
# Marginal Effect Calculation Function
#####################

me<-function(X, beta, dir, form, link.name){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
      
      if(link.name=="regression"){
        link.fun<-gaussian(link = "identity")$linkinv
      }
      
      if(link.name=="loglog"){
        link.fun<-function(theta){exp(-exp(-theta))}
      }
      
      if(link.name=="logc"){
        link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
      }
      
    }else{
      link.fun<-binomial(link = link.name)$linkinv
    }
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
  if(dir=="x2"){
    x1.1<-x1
    x1.2<-x1
    x2.1<-x2+0.01
    x2.2<-x2-0.01
    
    predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
    names(predict.dat.1)<-c("x1","x2")
    predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
    names(predict.dat.2)<-c("x1","x2")
    
    mm1<-model.matrix(form,predict.dat.1)
    mm2<-model.matrix(form,predict.dat.2)
    
    if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
      
      if(link.name=="regression"){
        link.fun<-gaussian(link = "identity")$linkinv
      }
      
      if(link.name=="loglog"){
        link.fun<-function(theta){exp(-exp(-theta))}
      }
      
      if(link.name=="logc"){
        link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
      }
      
    }else{
      link.fun<-binomial(link = link.name)$linkinv
    }
    
    xb1<-mm1%*%beta
    pry1<-link.fun(xb1)
    
    xb2<-mm2%*%beta
    pry2<-link.fun(xb2)
    
    return(as.vector((pry1-pry2)/0.02))
    
  }
  
}

clusterExport(cl, list="me")

me.shell.fun<-function(beta, form, link.name){
  return(c(me(cbind(0.5,0.5), beta, "x1", form, link.name), me(cbind(0.5,0.5), beta, "x2", form, link.name)))
}

me.dif<-function(beta, form, link.name){
  
  x1.1<-c(.5)
  x2.1<-c(1)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5)
  x2.2<-c(0)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me(X.1, beta=beta, dir="x1", form, link.name)
  me.2.1<-me(X.2, beta=beta, dir="x1", form, link.name)
  
  x1.1<-c(1)
  x2.1<-c(.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0)
  x2.2<-c(.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me(X.1, beta=beta, dir="x2", form, link.name)
  me.2.2<-me(X.2, beta=beta, dir="x2", form, link.name)
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}


me.true<-function(X, r, dir){
  
  x1<-X[,1]
  x2<-X[,2]
  
  if(dir=="x1"){
    
    x1.1<-x1+0.01 
    x1.2<-x1-0.01
    x2.1<-x2
    x2.2<-x2
    
    return(as.vector((trupry(r,x1.1,x2.1)-trupry(r,x1.2,x2.2))/0.02))
    
  }
  
  if(dir=="x2"){
    
    x1.1<-x1
    x1.2<-x1
    x2.1<-x2+0.01
    x2.2<-x2-0.01
    
    return(as.vector((trupry(r,x1.1,x2.1)-trupry(r,x1.2,x2.2))/0.02))
    
  }
  
}


me.dif.true<-function(r){
  
  x1.1<-c(.5)
  x2.1<-c(1)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(.5)
  x2.2<-c(0)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.1<-me.true(X.1, r, dir="x1")
  me.2.1<-me.true(X.2, r, dir="x1")
  
  x1.1<-c(1)
  x2.1<-c(.5)
  X.1<-cbind(x1.1,x2.1)
  
  x1.2<-c(0)
  x2.2<-c(.5)
  X.2<-cbind(x1.2,x2.2)
  
  me.1.2<-me.true(X.1, r, dir="x2")
  me.2.2<-me.true(X.2, r, dir="x2")
  
  me.1<-c(me.1.1, me.1.2)
  me.2<-c(me.2.1, me.2.2)
  
  return(me.1-me.2)
  
}

second.dif<-function(beta, form, link.name){
  
  x1.1<-c(1,0,1,0)
  x2.1<-c(1,1,0,0)
  
  x1.2<-c(.75,.25,.75,.25)
  x2.2<-c(.75,.75,.25,.25)
  
  predict.dat.1<-as.data.frame(cbind(x1.1,x2.1))
  names(predict.dat.1)<-c("x1","x2")
  
  predict.dat.2<-as.data.frame(cbind(x1.2,x2.2))
  names(predict.dat.2)<-c("x1","x2")
  
  
  mm1<-model.matrix(form,predict.dat.1)
  mm2<-model.matrix(form,predict.dat.2)
  
  
  if(link.name=="regression" | link.name == "loglog" | link.name == "logc"){
    
    if(link.name=="regression"){
      link.fun<-gaussian(link = "identity")$linkinv
    }
    
    if(link.name=="loglog"){
      link.fun<-function(theta){exp(-exp(-theta))}
    }
    
    if(link.name=="logc"){
      link.fun<-function(theta){logc(theta, bvalue=1-1e-5, inverse=T)}
    }
    
  }else{
    link.fun<-binomial(link = link.name)$linkinv
  }
  
  xb1<-mm1%*%beta
  pry1<-link.fun(xb1)
  
  xb2<-mm2%*%beta
  pry2<-link.fun(xb2)
  
  
  return(c((pry1[1]-pry1[2])-(pry1[3]-pry1[4]), (pry2[1]-pry2[2])-(pry2[3]-pry2[4])))
  
}



second.dif.true<-function(r){
  
  x1.1<-c(1,0,1,0)
  x2.1<-c(1,1,0,0)
  
  pry1<-trupry(r,x1.1,x2.1)
  
  return((pry1[1]-pry1[2])-(pry1[3]-pry1[4]))
  
}

me.ci<-c()
secdif.ci<-c()

for(i in 1:115){
  
  cat("Data Set: ", i, "\n")
  if(i<10){master.dat<-read.dta(file=paste("sim0",i,".dta", sep = ""))
  }else{master.dat<-read.dta(file=paste("sim",i,".dta", sep = ""))}
  
  me.ci.t<-matrix(data=NA, nrow=100, ncol=2)
  secdif.ci.t<-c()
  
  pb<-txtProgressBar(initial=0, min=1, max=100, style=3)
  
  for(j in 1:100){
    
    setTxtProgressBar(pb, value=j)
    
    baby.dat<-master.dat[(1+100*(j-1)):(100*j),]
    model<-glm(y~x1+x2+x1x2, family=binomial(link=probit), data=baby.dat)
    
    b.t<-rmvnorm(1000, mean=coefficients(model), sigma=vcov(model))
    
    
    me.dif.draws<-t(parApply(cl, X=b.t, FUN=me.dif, MARGIN=1, form=formula(~x1+x2+I(x1*x2)), link.name="probit"))
    second.dif.draws<-t(parApply(cl, X=b.t, FUN=second.dif, MARGIN=1, form=formula(~x1+x2+I(x1*x2)), link.name="probit"))
    
    me.dif.observed<-me.dif.true(i)
    second.dif.observed<-second.dif.true(i)
    
    me.dif.q <- rbind(quantile(me.dif.draws[,1], probs=c(0.025, 0.975)), quantile(me.dif.draws[,2], probs=c(0.025, 0.975)))
    second.dif.q<-quantile(second.dif.draws, probs=c(0.025, 0.975))
    
    me.ci.t[j,]<-c(ifelse(me.dif.q[1,1]<=me.dif.observed[1] & me.dif.q[1,2]>=me.dif.observed[1], 1, 0), ifelse(me.dif.q[2,1]<=me.dif.observed[2] & me.dif.q[2,2]>=me.dif.observed[2], 1, 0))
    secdif.ci.t[j]<-ifelse(second.dif.q[1]<=second.dif.observed & second.dif.q[2]>=second.dif.observed, 1, 0)
    
    
  }
  close(pb)
  
  me.ci[i]<-sum(me.ci.t)/(dim(me.ci.t)[1]*dim(me.ci.t)[2])
  secdif.ci[i]<-sum(secdif.ci.t)/length(secdif.ci.t)
  
}

save.image("confidence-interval-accuracy-workspace-2013-12-15.RData")

continuous.additive<-1:49
min.max.secdif.10<-50:61
min.max.secdif.20<-62:73
min.max.secdif.30<-74:91
min.max.secdif.40<-92:103
min.max.secdif.50<-104:115
analysis.indices<-list(continuous.additive, min.max.secdif.10, min.max.secdif.20, min.max.secdif.30, min.max.secdif.40, min.max.secdif.50)

# Footnote 25
mean(me.ci[analysis.indices[[1]]]) # 0.9416327
mean(me.ci[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]])]) # 0.9471429
mean(me.ci[c(analysis.indices[[5]], analysis.indices[[6]])]) # 0.940625

# Footnote 21
mean(secdif.ci[analysis.indices[[1]]]) # 0.9420408
mean(secdif.ci[c(analysis.indices[[2]], analysis.indices[[3]], analysis.indices[[4]])]) # 0.9378571
mean(secdif.ci[c(analysis.indices[[5]], analysis.indices[[6]])]) # 0.925














###################################
#
# Part Five: DGP drawings
#
###################################

rm(list=ls())
setwd("J:/Dropbox/jesarey_documents/BRE Logit Misspecification Paper/Final Replication Package")

# draw some true dgp plots
source("trupry_2013-8-18.r")
x<-seq(from=0, to=1, by=0.1)
pry<-trupry(15, x, 0)
plot(pry~x, type="l", main="DGP #15", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(15, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(15, x, 1)
lines(pry~x, lty=3)
dev.copy(pdf, "linear-additive-1.pdf")
dev.off()

x<-seq(from=0, to=1, by=0.1)
pry<-trupry(20, x, 0)
plot(pry~x, type="l", main = "DGP #20", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(20, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(20, x, 1)
lines(pry~x, lty=3)
dev.copy(pdf, "linear-additive-2.pdf")
dev.off()





x<-seq(from=0, to=1, by=0.1)
pry<-trupry(64, x, 0)
plot(pry~x, type="l", main="DGP # 64", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(64, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(64, x, 1)
lines(pry~x, lty=3)
dev.copy(pdf, "linear-interactive-1.pdf")
dev.off()


x<-seq(from=0, to=1, by=0.1)
pry<-trupry(74, x, 0)
plot(pry~x, main = "DGP #74", type="l", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(74, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(74, x, 1)
lines(pry~x, lty=3)
legend("topright", lty=c(1,2,3), legend=c("Z = 0    ", "Z = 0.5    ", "Z = 1    "))
dev.copy(pdf, "linear-interactive-2.pdf")
dev.off()







x<-seq(from=0, to=1, by=0.1)
pry<-trupry(40, x, 0)
plot(pry~x, main="DGP #40", type="l", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(40, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(40, x, 1)
lines(pry~x, lty=3)

dev.copy(pdf, "nonlinear-additive-1.pdf")
dev.off()


x<-seq(from=0, to=1, by=0.1)
pry<-trupry(45, x, 0)
plot(pry~x, main = "DGP #45", type="l", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(45, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(45, x, 1)
lines(pry~x, lty=3)

dev.copy(pdf, "nonlinear-additive-2.pdf")
dev.off()





x<-seq(from=0, to=1, by=0.1)
pry<-trupry(66, x, 0)
plot(pry~x, type="l", main="DGP #66", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(66, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(66, x, 1)
lines(pry~x, lty=3)

dev.copy(pdf, "nonlinear-interactive-1.pdf")
dev.off()


x<-seq(from=0, to=1, by=0.1)
pry<-trupry(73, x, 0)
plot(pry~x, type="l", main="DGP #73", xlab="X", ylab="Pr(Y)", ylim=c(0,1), cex.lab=1.5, cex.axis=1.5, cex.main=1.5, cex.sub=1.5)
pry<-trupry(73, x, 0.5)
lines(pry~x, lty=2)
pry<-trupry(73, x, 1)
lines(pry~x, lty=3)

dev.copy(pdf, "nonlinear-interactive-2.pdf")
dev.off()


# this code creates drawings of every DGP 
# used to create Figure 1

par(mfrow=c(3,3))
par(mar=c(5, 4, 3, 2), las=1)
k=0
j=0
leg.loc<-c("topright", "topright", "bottomright", "topright", "bottomright", "topleft", "topright", "topleft", "bottomright", "topright", "bottomright", "bottomright", "topright")
for(i in 1:115){
  
  x<-seq(from=0, to=1, by=0.1)
  pry<-trupry(i, x, 0)
  plot(pry~x, type="l", xlab="X", ylab="Pr(Y)", main=paste("DGP #", i, sep=""), ylim=c(0,1))
  pry<-trupry(i, x, 0.5)
  lines(pry~x, lty=2)
  pry<-trupry(i, x, 1)
  lines(pry~x, lty=3)
  if(max(i==(9*c(1:13)-8))){j=j+1; legend(leg.loc[j], lty=c(1,2,3), legend=c("Z = 0    ", "Z = 0.5    ", "Z = 1    "))}
  if(max(i==9*c(1:12))){k=k+1; dev.copy2pdf(file=paste("alldgp_appendix_", k, ".pdf", sep=""))}     
  
}
dev.copy2pdf(file=paste("alldgp_appendix_13.pdf", sep=""))
